# JSON / XML Parsing

## JSON Parsing

In [2]:
# importing the Python json library
import json

### Importing JSON to Dict { }

**From string**

In [3]:
# Example string
json_string = '{"first_name": "Guido", "last_name":"Rossum"}'
json_string

'{"first_name": "Guido", "last_name":"Rossum"}'

In [4]:
parsed_json = json.loads(json_string)
parsed_json

{u'first_name': u'Guido', u'last_name': u'Rossum'}

**From file**

In [5]:
# %load files/example.json

In [6]:
with open('files/example.json', 'r') as f:
    content = f.read()

parsed_json = json.loads(content)
parsed_json

{u'glossary': {u'GlossDiv': {u'GlossList': {u'GlossEntry': {u'Abbrev': u'ISO 8879:1986',
     u'Acronym': u'SGML',
     u'GlossDef': {u'GlossSeeAlso': [u'GML', u'XML'],
      u'para': u'A meta-markup language, used to create markup languages such as DocBook.'},
     u'GlossSee': u'markup',
     u'GlossTerm': u'Standard Generalized Markup Language',
     u'ID': u'SGML',
     u'SortAs': u'SGML'}},
   u'title': u'S'},
  u'title': u'example glossary'}}

### Exporting Dict { } to JSON

In [7]:
# Example dict
d = { 'first_name': 'Guido',
      'second_name': 'Rossum',
      'titles': ['BDFL', 'Developer'],
    }
d

{'first_name': 'Guido',
 'second_name': 'Rossum',
 'titles': ['BDFL', 'Developer']}

In [8]:
json_string = json.dumps(d)
json_string

'{"first_name": "Guido", "titles": ["BDFL", "Developer"], "second_name": "Rossum"}'

## XML Parsing

In [9]:
import xml.etree.ElementTree as ET

### Importing XML to ElementTree

**From string**

In [10]:
# Example string
xml_string = """<catalog>
   <book id="bk101">
      <author>Gambardella, Matthew</author>
      <title>XML Developer's Guide</title>
      <genre>Computer</genre>
   </book>
</catalog>"""
xml_string

'<catalog>\n   <book id="bk101">\n      <author>Gambardella, Matthew</author>\n      <title>XML Developer\'s Guide</title>\n      <genre>Computer</genre>\n   </book>\n</catalog>'

In [11]:
root = ET.fromstring(xml_string)
print root.tag
print root.attrib

catalog
{}


**From file**

In [12]:
# %load files/example.xml

In [13]:
tree = ET.parse('files/example.xml')
root = tree.getroot()
print root.tag
print root.attrib

catalog
{}


### Exporting ElementTree to XML

In [14]:
tree.write("files/example_output.xml")

In [15]:
# %load files/example_output.xml

### Getting items

In [16]:
# Each XML element contains a tag, an attribute (optional), a text and a list of childs
# XML Element: <tag attr:attr_value> text </tag>

print root.tag            # tag    : name of XML element
print root.attrib         # attrib : attribute of XML element
print root.text           # text   : content of XML element
for child in root: # loop through all subchildrens
    print "\t", child.tag, child.attrib, child.text
    for subchild in child:
        print "\t\t", subchild.tag, subchild.attrib, subchild.text
    print
    
children = list(root)     #Get children

print children
print root

catalog
{}

   
	book {'id': 'bk101'} 
      
		author {} Gambardella, Matthew
		title {} XML Developer's Guide
		genre {} Computer
		price {} 44.95
		publish_date {} 2000-10-01
		description {} An in-depth look at creating applications
      with XML.

	book {'id': 'bk103'} 
      
		author {} Corets, Eva
		title {} Maeve Ascendant
		genre {} Fantasy
		price {} 5.95
		publish_date {} 2000-11-17
		description {} After the collapse of a nanotechnology
      society in England, the young survivors lay the
      foundation for a new society.

	book {'id': 'bk109'} 
      
		author {} Kress, Peter
		title {} Paradox Lost
		genre {} Science Fiction
		price {} 6.95
		publish_date {} 2000-11-02
		description {} After an inadvertant trip through a Heisenberg
      Uncertainty Device, James Salway discovers the problems
      of being quantum.

[<Element 'book' at 0x104d22090>, <Element 'book' at 0x104d22b50>, <Element 'book' at 0x104d22d10>]
<Element 'catalog' at 0x104d228d0>


In [17]:
# iter('element_tag') - search all subtrees
for title in root.iter('title'): 
    print title.text

XML Developer's Guide
Maeve Ascendant
Paradox Lost


In [18]:
# findall('element_tag') - get direct childs of parent
for book in root.findall('book'):
    # Get items
    book_id = book.get('id')          # Get attribute 'id' of 'book' element
    title = book.find('title')        # Get first child named 'title'
    author = book.find('author')
    genre = book.find('genre')
    price = book.find('price')
    pdate = book.find('publish_date')
    descr = book.find('description')
    
    # Print items
    print "ID ", book_id
    print title.tag, title.text
    print author.tag, author.text
    print genre.tag, genre.text
    print price.tag, price.text
    print pdate.tag, pdate.text
    print descr.tag, descr.text
    print

ID  bk101
title XML Developer's Guide
author Gambardella, Matthew
genre Computer
price 44.95
publish_date 2000-10-01
description An in-depth look at creating applications
      with XML.

ID  bk103
title Maeve Ascendant
author Corets, Eva
genre Fantasy
price 5.95
publish_date 2000-11-17
description After the collapse of a nanotechnology
      society in England, the young survivors lay the
      foundation for a new society.

ID  bk109
title Paradox Lost
author Kress, Peter
genre Science Fiction
price 6.95
publish_date 2000-11-02
description After an inadvertant trip through a Heisenberg
      Uncertainty Device, James Salway discovers the problems
      of being quantum.



In [19]:
# find('element_tag') - finds first child with tag
for book_content in root.find('book'):
    print book_content

<Element 'author' at 0x104d22950>
<Element 'title' at 0x104d229d0>
<Element 'genre' at 0x104d22a50>
<Element 'price' at 0x104d22a90>
<Element 'publish_date' at 0x104d22ad0>
<Element 'description' at 0x104d22b10>


### Modifying items

In [20]:
# set('attribute', 'attribute_value')
for price in root.iter('price'):
    price.text = str(float(price.text) + 1)
    price.set('increased', 'yes')
    print price.text, price.attrib

45.95 {'increased': 'yes'}
6.95 {'increased': 'yes'}
7.95 {'increased': 'yes'}


In [21]:
# Create new element
new_book = ET.Element('book', {'id': 'bk113'})
new_book.text = "\n"
                      
# Create sub elements
author = ET.SubElement(new_book, 'author')
title = ET.SubElement(new_book, 'title')
genre = ET.SubElement(new_book, 'genre')
price = ET.SubElement(new_book, 'price')
pdate = ET.SubElement(new_book, 'publish_date')
descr = ET.SubElement(new_book, 'description')

# Populate sub elements
author.text = "J.K Rowlings"
title.text = "Harry Potter and the Sorcerer's Stone"
genre.text = "Fantasy"
price.text = "31.50"
pdate.text = "2001-10-16"
descr.text = "A very nice fantasy book."

# Add element to existing tree
root.append(new_book)

# ET.dump(new_book)

In [22]:
# A ten times nicer way of doing it (focus on what matters)

def create_new_ET(element_name, attributes={}, elements={}):
    book = ET.Element('book', attributes)
    for key in elements:
        new_elem = ET.SubElement(book, key)
        new_elem.text = elements[key]
    return book

# Create new element
name     = 'book'
attr     = {'id': "bk114"}
elements = {'author': "J.K Rowlings",
            'title': "Harry Potter and the Chamber of Secrets",
            'genre': "Fantasy",
            'price': "35.50",
            'publish_date': "2002-10-15",
            'description': "The second volume of a very nice fantasy book."}
new_book = create_new_ET(name, attr, elements)

# Append to existing tree
root.append(new_book)

# ET.dump(root)

## Bonus: Pretty Print and Conversion

### Pretty Print

In [43]:
def indent(elem, level=0):
    i = "\n" + level*"  "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + "  "
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent(elem, level+1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i
            
indent(root)
tree.write("files/example_output.xml")
ET.dump(tree)

<catalog>
  <book id="bk101">
    <author>Gambardella, Matthew</author>
    <title>XML Developer's Guide</title>
    <genre>Computer</genre>
    <price increased="yes">45.95</price>
    <publish_date>2000-10-01</publish_date>
    <description>An in-depth look at creating applications
      with XML.</description>
  </book>
  <book id="bk103">
    <author>Corets, Eva</author>
    <title>Maeve Ascendant</title>
    <genre>Fantasy</genre>
    <price increased="yes">6.95</price>
    <publish_date>2000-11-17</publish_date>
    <description>After the collapse of a nanotechnology
      society in England, the young survivors lay the
      foundation for a new society.</description>
  </book>
  <book id="bk109">
    <author>Kress, Peter</author>
    <title>Paradox Lost</title>
    <genre>Science Fiction</genre>
    <price increased="yes">7.95</price>
    <publish_date>2000-11-02</publish_date>
    <description>After an inadvertant trip through a Heisenberg
      Uncertainty Device, James Salwa

In [24]:
# %load files/example_output.xml

### Etree to Dict { }

In [25]:
from collections import defaultdict

def etree_to_dict(t):
    d = {t.tag: {} if t.attrib else None}
    children = list(t)
    if children:
        dd = defaultdict(list)
        for dc in map(etree_to_dict, children):
            for k, v in dc.iteritems():
                dd[k].append(v)
        d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.iteritems()}}
    if t.attrib:
        d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems())
    if t.text:
        text = t.text.strip()
        if children or t.attrib:
            if text:
              d[t.tag]['#text'] = text
        else:
            d[t.tag] = text
    return d

In [26]:
d = etree_to_dict(root)
d

{'catalog': {'book': [{'@id': 'bk101',
    'author': 'Gambardella, Matthew',
    'description': 'An in-depth look at creating applications\n      with XML.',
    'genre': 'Computer',
    'price': {'#text': '45.95', '@increased': 'yes'},
    'publish_date': '2000-10-01',
    'title': "XML Developer's Guide"},
   {'@id': 'bk103',
    'author': 'Corets, Eva',
    'description': 'After the collapse of a nanotechnology\n      society in England, the young survivors lay the\n      foundation for a new society.',
    'genre': 'Fantasy',
    'price': {'#text': '6.95', '@increased': 'yes'},
    'publish_date': '2000-11-17',
    'title': 'Maeve Ascendant'},
   {'@id': 'bk109',
    'author': 'Kress, Peter',
    'description': 'After an inadvertant trip through a Heisenberg\n      Uncertainty Device, James Salway discovers the problems\n      of being quantum.',
    'genre': 'Science Fiction',
    'price': {'#text': '7.95', '@increased': 'yes'},
    'publish_date': '2000-11-02',
    'title': 'Para

### Dict { } to Etree

In [27]:
def dict_to_etree(d):
    def _to_etree(d, root):
        if not d:
            pass
        elif isinstance(d, basestring):
            root.text = d
        elif isinstance(d, dict):
            for k,v in d.items():
                assert isinstance(k, basestring)
                if k.startswith('#'):
                    assert k == '#text' and isinstance(v, basestring)
                    root.text = v
                elif k.startswith('@'):
                    assert isinstance(v, basestring)
                    root.set(k[1:], v)
                elif isinstance(v, list):
                    for e in v:
                        _to_etree(e, ET.SubElement(root, k))
                else:
                    _to_etree(v, ET.SubElement(root, k))
        else: assert d == 'invalid type', (type(d), d)
    assert isinstance(d, dict) and len(d) == 1
    tag, body = next(iter(d.items()))
    node = ET.Element(tag)
    _to_etree(body, node)
    return node

In [28]:
t = dict_to_etree(d)
indent(t)
ET.dump(t)

<catalog>
  <book id="bk101">
    <description>An in-depth look at creating applications
      with XML.</description>
    <author>Gambardella, Matthew</author>
    <price increased="yes">45.95</price>
    <title>XML Developer's Guide</title>
    <publish_date>2000-10-01</publish_date>
    <genre>Computer</genre>
  </book>
  <book id="bk103">
    <description>After the collapse of a nanotechnology
      society in England, the young survivors lay the
      foundation for a new society.</description>
    <author>Corets, Eva</author>
    <price increased="yes">6.95</price>
    <title>Maeve Ascendant</title>
    <publish_date>2000-11-17</publish_date>
    <genre>Fantasy</genre>
  </book>
  <book id="bk109">
    <description>After an inadvertant trip through a Heisenberg
      Uncertainty Device, James Salway discovers the problems
      of being quantum.</description>
    <author>Kress, Peter</author>
    <price increased="yes">7.95</price>
    <title>Paradox Lost</title>
    <publish_dat

## Exercise: Analyze XML file
### Problem
**Objectives:** 
- Extract Maven plugins information from ```base-corporate-pom```.
- Add a new Maven plugin to ```base-corporate-pom```.
- Write Maven plugins information to a new file ```files/pom_maven_plugins.xml```

**Information:**

- ```base-corporate-pom``` pom.xml is located in ```files/pom.xml```


- XML plugin structure:
```
<project>
    <!-- Plugin version -->
    <properties>
        <maven-surefire-plugin.version>2.12.4</maven-surefire-plugin.version>
    </properties>
    <!-- Plugin info -->
    <build>
        <pluginManagement>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugin</groupId>
                    <artifactId>maven-surefire-plugin</version>
                    <version>$(maven-surefire-plugin.version)</version>
                 </plugin>
            </plugins>
         </pluginManagement>
    </build>
</project>
```
- Some plugins don't have a ```<version>``` or a ```<groupId>``` tag. Print "N/A" when there is no tag.
- All plugins have an ```<artifactId>``` tag.

**Desired output:**

maven-surefire

    groupId: org.apache.maven.plugins
    artifactId: maven-surefire-plugin
    version: 2.12.4

### Solution

In [29]:
import xml.etree.ElementTree as ET

root = ET.parse('files/pom.xml').getroot()

# Variables
plugin_dict = {}
plugin_list = []
base = "{http://maven.apache.org/POM/4.0.0}"

# Find 'build' and 'properties' XML tags in root
build = root.find(base + 'build')
properties = root.find(base + 'properties')

# Find 'pluginManagement' XML tag in 'build'
pluginManagement = build.find(base + 'pluginManagement')

# Find 'plugins' XML tag in 'pluginManagement'
plugins = pluginManagement.find(base + 'plugins')

# Populate plugin_list
plugin_list = list(plugins)

# Loop through plugins (of class ElementTree)
for plugin in plugin_list:
    
        # Get  plugin information
        artifactId_str = plugin.find(base + 'artifactId').text
        name = artifactId_str.replace('-plugin', '')
        
        # Not all plugins have a groupId !
        try:
            groupId_str = plugin.find(base + 'groupId').text
        except:
            groupId_str = "N/A"
        
        # Not all plugins have a version !
        try:
            version_str = plugin.find(base + 'version').text
            version_str = version_str[2:-1] # strip '$', '{', and '}' from version
            version_str = properties.find(base + version_str).text
        except:
            version_str = "N/A"
            
        # Print plugin information
        print artifactId_str.replace('-plugin', '')
        print "\t" + "groupId: " + groupId_str
        print "\t" + "artifactId: " + artifactId_str
        print "\t" + "version: " + version_str
        
        # Add plugin to dictionary
        plugin_dict[name] = {'artifactId': artifactId_str,
                             'groupId': groupId_str,
                             'version': version_str
                             }
        
        # Write dict to file:
        with open('files/pom_maven_plugins_output.xml', 'w') as f:
            for pname, sub in plugin_dict.iteritems():
                f.write(pname + "\n")
                for k, v in sub.iteritems():
                    f.write("\t" + k + ": " + v + "\n")

maven-surefire
	groupId: org.apache.maven.plugins
	artifactId: maven-surefire-plugin
	version: 2.12.4
properties-maven
	groupId: org.codehaus.mojo
	artifactId: properties-maven-plugin
	version: 1.0-alpha-2
maven-release
	groupId: org.apache.maven.plugins
	artifactId: maven-release-plugin
	version: 2.2.1
maven-install
	groupId: org.apache.maven.plugins
	artifactId: maven-install-plugin
	version: 2.4
maven-deploy
	groupId: org.apache.maven.plugins
	artifactId: maven-deploy-plugin
	version: 2.7
versions-maven
	groupId: org.codehaus.mojo
	artifactId: versions-maven-plugin
	version: 2.1
ccc-versions-maven
	groupId: com.cccis.build.maven
	artifactId: ccc-versions-maven-plugin
	version: 0.0.38
oc4j-admin-maven
	groupId: com.cccis.build.maven
	artifactId: oc4j-admin-maven-plugin
	version: 0.0.37
weblogic-maven
	groupId: com.oracle.weblogic
	artifactId: weblogic-maven-plugin
	version: 10.3.4
wls-maven
	groupId: com.oracle.weblogic
	artifactId: wls-maven-plugin
	version: 12.1.1.0
gmaven
	groupId

### Solution 2 (improved)

In [30]:
import xml.etree.ElementTree as ET
import re

def concat(list_tags):
    # Small function to concatenate the base with the tag name for a list of names.
    # Return a tuple containing the new names.
    t = tuple()
    for tag in list_tags:
        t += (base + tag,)
    return t

def build_path(list_, base = ''):
    # Create a path from a list of subpaths. 
    # If a base is defined, append the based before each path element.
    path = "."
    for a in list_:
        if base:
            path += "/" + base + a
            continue
        path += "/" + a
    return path

def get_plugin_info(plugin):
    # Get concatenated names
    names = ['artifactId', 'groupId', 'version']
    aId, gId, v = concat(names)
    
    # Get elements from plugin ET
    artifactId = plugin.find(aId)
    groupId = plugin.find(gId)
    version = plugin.find(v)
    
    # Set output values
    
    try:
        aId_str = artifactId.text
    except Exception as e:
        raise(e)
            
    try:
        gId_str = groupId.text
    except:
        gId_str = "N/A"
        
    try:
        version_str = version.text # strip '$', '{', and '}' from version.text
        try:
            float(version_str)
            version_str = str(version_str)
        except ValueError:
            version_str = version_str[2:-1]
            version_str = properties.find(base + version_str).text
    except:
        version_str = "N/A"
    
    # Return plugin information
    name_str = aId_str.replace('-plugin','')  
    return (aId_str, gId_str, version_str, name_str)

def format_plugin_info(plugin):
    artifactId, groupId, version, name = get_plugin_info(plugin) 
    to_print = name + "\n" + \
    "\t" + "groupId: " + groupId + "\n" + \
    "\t" + "artifactId: " + artifactId + "\n" + \
    "\t" + "version: " + version
    return to_print

def print_plugin(plugin):
    print format_plugin_info(plugin)

def write_plugin_to_file(plugin, filepath):
    to_print = format_plugin_info(plugin)
    with open(filepath, 'a') as f:
        f.write(to_print)    

# main function
if __name__ == '__main__':
    root = ET.parse('files/pom.xml').getroot()
    base = "{http://maven.apache.org/POM/4.0.0}"
    
    # Generate plugins path and properties path
    plugins_path    = build_path(['build', 'pluginManagement', 'plugins'], base=base)
    properties_path = build_path(['properties'], base=base)
    
    # Get plugins ET and properties ET
    plugins = root.find(plugins_path)

    # Get plugins info and print plugins
    for p in plugins.findall(base + 'plugin'):
        print_plugin(p)
        write_plugin_to_file(p, 'files/pom_maven_plugins_output.xml')

maven-surefire
	groupId: org.apache.maven.plugins
	artifactId: maven-surefire-plugin
	version: 2.12.4
properties-maven
	groupId: org.codehaus.mojo
	artifactId: properties-maven-plugin
	version: 1.0-alpha-2
maven-release
	groupId: org.apache.maven.plugins
	artifactId: maven-release-plugin
	version: 2.2.1
maven-install
	groupId: org.apache.maven.plugins
	artifactId: maven-install-plugin
	version: 2.4
maven-deploy
	groupId: org.apache.maven.plugins
	artifactId: maven-deploy-plugin
	version: 2.7
versions-maven
	groupId: org.codehaus.mojo
	artifactId: versions-maven-plugin
	version: 2.1
ccc-versions-maven
	groupId: com.cccis.build.maven
	artifactId: ccc-versions-maven-plugin
	version: 0.0.38
oc4j-admin-maven
	groupId: com.cccis.build.maven
	artifactId: oc4j-admin-maven-plugin
	version: 0.0.37
weblogic-maven
	groupId: com.oracle.weblogic
	artifactId: weblogic-maven-plugin
	version: 10.3.4
wls-maven
	groupId: com.oracle.weblogic
	artifactId: wls-maven-plugin
	version: 12.1.1.0
gmaven
	groupId

## Solution 3 (OOP Approach)

In [32]:
import xml.etree.ElementTree as ET
import re

class Plugin(object):
    """ A plugin has a name, an artifactId, a groupId and a version."""
    def __init__(self, plugin, base):
        self.base = base
        self.__get_plugin_info(plugin)

    def __repr__(self):
        to_print = self.name + "\n" + \
        "\t" + "groupId: " + self.groupId + "\n" + \
        "\t" + "artifactId: " + self.artifactId + "\n" + \
        "\t" + "version: " + self.version
        return to_print
    
    # PRIVATE
    def __concat(self, list_tags):
        """Small function to concatenate the base with the tag name for a list of names.
        Return a tuple containing the new names."""
        t = tuple()
        for tag in list_tags:
            t += (self.base + tag,)
        return t
    
    def __get_plugin_info(self, plugin):  
        """ Takes a plugin of class ElementTree and populates this Plugin object.
        Fields are: name, artifactId, groupid, version"""
        # Get concatenated names
        names = ['artifactId', 'groupId', 'version']
        aId, gId, v = self.__concat(names)
        
        # Get elements from plugin ET
        artifactId = plugin.find(aId)
        groupId = plugin.find(gId)
        version = plugin.find(v)
        
        # Set output values
        try:
            aId_str = artifactId.text
        except Exception as e:
            raise(e)
            
        try:
            gId_str = groupId.text
        except:
            gId_str = "N/A"
            
        try:
            version_str = version.text # strip '$', '{', and '}' from version.text
            try:
                float(version_str)
                version_str = str(version_str)
            except ValueError:
                version_str = version_str[2:-1]
                version_str = properties.find(base + version_str).text
        except:
            version_str = "N/A"

        name_str = aId_str.replace('-plugin','')  
        
        # Populate Plugin object
        self.artifactId = aId_str
        self.groupId = gId_str
        self.version = version_str
        self.name = name_str
            
class POMPluginExtractor(object):
    """This class serves as extractor of plugins from any POM file."""
    def __init__(self, filepath, base):
        self.filepath = filepath
        self.base = base
        self.plugins = []
        self.__get_root()
        self.__get_plugins()
    
    def print_plugins(self):
        for p in self.plugins:
            print p
    
    def save_plugins(self, filepath):
        with open(filepath, 'w') as f:
            for p in self.plugins:
                print >>f, p
                print >>f
                
        print "Plugins saved to %s" % filepath
            
    # PRIVATE
    def __get_root(self):
        self.root = ET.parse(self.filepath).getroot()

    def __get_plugins(self):
        plugins_path = self.__build_path(['build', 'pluginManagement', 'plugins'], base=self.base)
        plugins = self.root.find(plugins_path)
        for p in plugins.findall(self.base + 'plugin'):
            self.plugins.append(Plugin(p, self.base))
            
    def __build_path(self, list_, base = ''):
        path = "."
        for a in list_:
            if base:
                path += "/" + base + a
                continue
            path += "/" + a
        return path    

# main function
if __name__ == '__main__':
    inputFile = 'files/pom.xml'
    outputFile = 'files/pom_maven_plugins_output.xml'
    base = "{http://maven.apache.org/POM/4.0.0}"
    
    analyzer = POMPluginExtractor(inputFile, base)
    analyzer.save_plugins(outputFile)
    analyzer.print_plugins()

Plugins saved to files/pom_maven_plugins.xml
maven-surefire
	groupId: org.apache.maven.plugins
	artifactId: maven-surefire-plugin
	version: 2.12.4
properties-maven
	groupId: org.codehaus.mojo
	artifactId: properties-maven-plugin
	version: 1.0-alpha-2
maven-release
	groupId: org.apache.maven.plugins
	artifactId: maven-release-plugin
	version: 2.2.1
maven-install
	groupId: org.apache.maven.plugins
	artifactId: maven-install-plugin
	version: 2.4
maven-deploy
	groupId: org.apache.maven.plugins
	artifactId: maven-deploy-plugin
	version: 2.7
versions-maven
	groupId: org.codehaus.mojo
	artifactId: versions-maven-plugin
	version: 2.1
ccc-versions-maven
	groupId: com.cccis.build.maven
	artifactId: ccc-versions-maven-plugin
	version: 0.0.38
oc4j-admin-maven
	groupId: com.cccis.build.maven
	artifactId: oc4j-admin-maven-plugin
	version: 0.0.37
weblogic-maven
	groupId: com.oracle.weblogic
	artifactId: weblogic-maven-plugin
	version: 10.3.4
wls-maven
	groupId: com.oracle.weblogic
	artifactId: wls-ma

# REST API (Django)

## Serialization

## Requests and Responses

## Class Based view

## Authentication and Permission

## Relationships and Hyperlink APIs

## Viewsets and routers

## Example: Twitter REST API

# SQL Data Access (MySQL)

In [33]:
import mysql.connector

## Connecting to MySQL DB

In [34]:
cnx = mysql.connector.connect(user='root', password='password',
                              host='localhost')
cnx.close()
cnx

<mysql.connector.connection.MySQLConnection at 0x1050498d0>

**Lots of connection arguments**

In [35]:
config = {
  'user': 'root',
  'password': 'password',
  'host': 'localhost',
  'database': 'tutorial',
  'raise_on_warnings': True,
}

cnx = mysql.connector.connect(**config)

cnx.close()
cnx

<mysql.connector.connection.MySQLConnection at 0x105049bd0>

**Handling connection errors**

In [36]:
from mysql.connector import errorcode

config = {
  'user': 'root',
  'password': 'wrong_password',
  'host': 'localhost',
  'database': 'tutorial',
  'raise_on_warnings': True,
}
try:
    cnx = mysql.connector.connect(**config)

except mysql.connector.Error as err:
    if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
        print "Something is wrong with you user name or password"
    elif err.errno == errorcode.ER_BAD_DB_ERROR:
        print "Database does not exist."
    else:
        print(err)
else:
    cnx.close()

Something is wrong with you user name or password


**In a function**

In [42]:
config = {
  'user': 'root',
  'password': 'wrong_password',
  'host': 'localhost',
  'database': 'tutorial',
  'raise_on_warnings': True,
}

def connect_mysql(**config):
    try:
        cnx = mysql.connector.connect(**config)

    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
            print "Something is wrong with you user name or password"
        elif err.errno == errorcode.ER_BAD_DB_ERROR:
            print "Database does not exist."
        else:
            print(err)
    else:
        print "Connection to database MySQL server successful."
        cnx.close()

if __name__ == '__main__':
    connect_mysql(**config)

Connection to database MySQL server successful.


## Creating / Deleting a database

**Note: All DDL (Data Definition Language) statements are executed using a handle structure known as 'cursor'.**

In [38]:
def create_database(cursor, db_name):
    try:
        cursor.execute(
            "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(db_name))
    except mysql.connector.Error as err:
        print("Failed creating database: {}".format(err))
        exit(1)

        
def get_database(cnx, db_name):
    # Get an existing db 'db_name'. 
    # If it doesn't exist, create a new database 'db_name'.
    cursor = cnx.cursor()
    try:
        cnx.database = db_name    
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_BAD_DB_ERROR:
            print "Database does not exist! Creating database..."
            create_database(cursor, db_name)
            cnx.database = db_name
        else:
            print(err)
            exit(1)

if __name__ == '__main__':
    cnx = mysql.connector.connect(user='root', password='password')
    get_database(cnx, 'new_db')
    print cnx.database

new_db


## Creating / Deleting a table

**Using raw ddl**

In [39]:
# Variables
table_name = "employees"
table_ddl = "CREATE TABLE employees (first_name VARCHAR(25) PRIMARY KEY, last_name VARCHAR(25))"

In [40]:
# create_table(cursor, table_name, table_ddl)
def create_table(cursor, table_name, table_ddl):
    try:
        print "Creating table %s ..." % table_name
        cursor.execute(table_ddl)
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_TABLE_EXISTS_ERROR:
            print "already exists."
        else:
            print err.msg
    else:
        print "OK"
            
if __name__ == '__main__':
    cnx = mysql.connector.connect(user='root', password='password', database='new_db')
    cursor = cnx.cursor()
    create_table(cursor, table_name, table_ddl)

Creating table employees ...
already exists.


**Using Python tuples and dict**

In [58]:
employees_table_descr = (
    "emp_no int(11) NOT NULL AUTO_INCREMENT",
    "birth_date date NOT NULL",
    "first_name varchar(14) NOT NULL",
    "last_name varchar(16) NOT NULL",
    "gender enum('M', 'F') NOT NULL",
    "hire_date date NOT NULL",
    "PRIMARY KEY (emp_no)"
)
    
salaries_table_descr = (
    "emp_no int(11) NOT NULL",
    "salary int(11) NOT NULL",
    "from_date date NOT NULL",
    "to_date date NOT NULL",
    "PRIMARY KEY (emp_no, from_date), KEY emp_no (emp_no)"
)

In [56]:
# create_table_from_tuple(cursor, table_name, kwargs)  
def create_table_from_tuple(cursor, name, kwargs):
    ddl = ','.join(kwargs)
    ddl = "CREATE TABLE %s (" % name + ddl + ")"
    create_table(cursor, name, ddl)

if __name__ == '__main__':
    create_table_from_tuple(cursor, 'employees', employees_table_descr)
    create_table_from_tuple(cursor, 'salaries', salaries_table_descr)

Creating table employees ...
OK
Creating table salaries ...
OK


In [62]:
# create_tables_from_dict(cursor, d)
def create_tables_from_dict(cursor, d):
    for key in d:
        create_table_from_tuple(cursor, key, d[key])

if __name__ == '__main__':  
    d = {
        'employees': employees_table_descr,
        'salaries': salaries_table_descr
    }
    create_tables_from_dict(cursor, d)

Creating table employees ...
OK
Creating table salaries ...
OK


## Populating a table

## Querying data from a table

## OOP Approach

In [None]:
# OOP approach
class Database(object):
    def __init__(self, name, cnx):
        self.name = name
        self.cnx = cnx
        self.cursor = cnx.cursor()
        self.tables = []
        
    def get(self):
        # Get an existing database named db_name. 
        # If it doesn't exist, create a new database db_name.
        try:
            self.cnx.database = self.name
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_BAD_DB_ERROR:
                print "Database does not exist! Creating database..."
                create_database(self.cursor, self.name)
                cnx.database = self.name
            else:
                print(err)
                exit(1)
        
    def create(self):
        # Create a new database on the MySQL server
        try:
            cursor.execute(
                "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(self.name))
        except mysql.connector.Error as err:
            print("Failed creating database: {}".format(err))
            exit(1)
    
    def delete(self):
        # Delete an existing database.
        pass
    
    def create_table(self, name, descr):
        # Create a new table in the database.
        try:
            print "Creating table %s ..." % name
            self.cursor.execute(descr)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_TABLE_EXISTS_ERROR:
                print "already exists."
            else:
                print err.msg
        else:
            self.tables.append(Table(name, descr))
            print "OK"
            
    def delete_table(self, name):
        # Delete an existing table from the database
        pass
        
class Table(object):
    def __init__(self, name, descr):
        self.name = name
        self.descr = descr
    def describe(self):
        # Returns description of the table
        ddl = "DESCRIBE %s" %self.name
        
if __name__ == '__main__':
    cnx = mysql.connector.connect(user='root', password='password')
    Table('employee', table_descr_tuple)

# NoSQL Data Access (DynamoDB)

In [None]:
import boto3

## Connecting to DynamoDB

## Creating / Deleting a table

## Populating a table

## Querying data from a table

# Exercise: Parse a switch record and output to MySQL / DynamoDB