In [3]:
str = '<?xml version="1.0"?>\
<data type="daily">\
    <country name="Liechtenstein">\
        <rank>1</rank>\
        <year>2008</year>\
        <gdppc>141100</gdppc>\
        <neighbor name="Austria" direction="E"/>\
        <neighbor name="Switzerland" direction="W"/>\
    </country>\
    <country name="Singapore">\
        <rank>4</rank>\
        <year>2011</year>\
        <gdppc>59900</gdppc>\
        <neighbor name="Malaysia" direction="N"/>\
    </country>\
    <country name="Panama">\
        <rank>68</rank>\
        <year>2011</year>\
        <gdppc>13600</gdppc>\
        <neighbor name="Costa Rica" direction="W"/>\
        <neighbor name="Colombia" direction="E"/>\
    </country>\
</data>'

In [4]:
import xml.etree.ElementTree as ET
root = ET.fromstring(str)

In [5]:
root

<Element 'data' at 0x7f391e3c9c28>

In [6]:
root.tag

'data'

In [7]:
root.attrib

{'type': 'daily'}

In [8]:
root.attrib['type']

'daily'

In [9]:
for child in root:
    print(child.attrib['name'])

Liechtenstein
Singapore
Panama


In [10]:
for child in root:
    name = child.get('name')
    rank = child.find('rank').text
    year = child.find('year').text
    ct = {'name': name, 'rank': rank, 'year': year}
    print(ct)


{'name': 'Liechtenstein', 'rank': '1', 'year': '2008'}
{'name': 'Singapore', 'rank': '4', 'year': '2011'}
{'name': 'Panama', 'rank': '68', 'year': '2011'}


In [28]:
# <Trades>
trades = '<Trades>\
<Trade CorrelationId="234" NumberOfTrades="3" Limit="1000" TradeID="654">100</Trade>\
<Trade CorrelationId="234" NumberOfTrades="3" Limit="1000" TradeID="135">200</Trade>\
<Trade CorrelationId="222" NumberOfTrades="1" Limit="500" TradeID="423">600</Trade>\
<Trade CorrelationId="234" NumberOfTrades="3" Limit="1000" TradeID="652">200</Trade>\
<Trade CorrelationId="200" NumberOfTrades="2" Limit="1000" TradeID="645">1000</Trade>\
</Trades>'

In [None]:
trades = '<Trades>\
<Trade><CorrelationId>234</CorrelationId><NumberOfTrades>3</NumberOfTrades><Limit>1000</Limit><TradeID>654</TradeID>100</Trade>\
<Trade CorrelationId="234" NumberOfTrades="3" Limit="1000" TradeID="135">200</Trade>\
<Trade CorrelationId="222" NumberOfTrades="1" Limit="500" TradeID="423">600</Trade>\
<Trade CorrelationId="234" NumberOfTrades="3" Limit="1000" TradeID="652">200</Trade>\
<Trade CorrelationId="200" NumberOfTrades="2" Limit="1000" TradeID="645">1000</Trade>\
</Trades>'

In [29]:
root = ET.fromstring(trades)
root.tag

'Trades'

In [30]:
trades = []
for child in root:
    CorrelationId = child.get('CorrelationId')
    NumberOfTrades = child.get('NumberOfTrades')
    Limit = child.get('Limit')
    TradeID = child.get('TradeID')
    Amount = child.text
    ct = {'CorrelationId': CorrelationId, 
          'NumberOfTrades': NumberOfTrades, 
          'Limit': Limit, 
          'TradeID': TradeID,
          'Amount': Amount}
    print(ct)
    trades.append(ct)

print(trades)

{'CorrelationId': '234', 'NumberOfTrades': '3', 'Limit': '1000', 'TradeID': '654', 'Amount': '100'}
{'CorrelationId': '234', 'NumberOfTrades': '3', 'Limit': '1000', 'TradeID': '135', 'Amount': '200'}
{'CorrelationId': '222', 'NumberOfTrades': '1', 'Limit': '500', 'TradeID': '423', 'Amount': '600'}
{'CorrelationId': '234', 'NumberOfTrades': '3', 'Limit': '1000', 'TradeID': '652', 'Amount': '200'}
{'CorrelationId': '200', 'NumberOfTrades': '2', 'Limit': '1000', 'TradeID': '645', 'Amount': '1000'}
[{'CorrelationId': '234', 'NumberOfTrades': '3', 'Limit': '1000', 'TradeID': '654', 'Amount': '100'}, {'CorrelationId': '234', 'NumberOfTrades': '3', 'Limit': '1000', 'TradeID': '135', 'Amount': '200'}, {'CorrelationId': '222', 'NumberOfTrades': '1', 'Limit': '500', 'TradeID': '423', 'Amount': '600'}, {'CorrelationId': '234', 'NumberOfTrades': '3', 'Limit': '1000', 'TradeID': '652', 'Amount': '200'}, {'CorrelationId': '200', 'NumberOfTrades': '2', 'Limit': '1000', 'TradeID': '645', 'Amount': '10

In [31]:
import pandas as pd

df = pd.DataFrame(trades)
df.set_index('TradeID', drop=True, inplace=True)

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 654 to 645
Data columns (total 4 columns):
Amount            5 non-null object
CorrelationId     5 non-null object
Limit             5 non-null object
NumberOfTrades    5 non-null object
dtypes: object(4)
memory usage: 200.0+ bytes


In [33]:
df[['Amount', 'Limit']] = df[['Amount', 'Limit']].astype(float)
df[['NumberOfTrades']] = df[['NumberOfTrades']].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 654 to 645
Data columns (total 4 columns):
Amount            5 non-null float64
CorrelationId     5 non-null object
Limit             5 non-null float64
NumberOfTrades    5 non-null int64
dtypes: float64(2), int64(1), object(1)
memory usage: 200.0+ bytes


In [34]:
dfg = df.groupby('CorrelationId').agg({'CorrelationId': 'count', 'NumberOfTrades': 'max', 'Amount': 'sum', 'Limit': 'max'})
dfg = dfg.rename(columns={
    'CorrelationId': 'NumberOfTrades', 
    'NumberOfTrades': 'MaxNumberOfTrades', 
    'Amount': 'ActualTotalAmount'})

In [35]:
def tstate(row):
    if (row['NumberOfTrades'] > row['MaxNumberOfTrades']) or (row['ActualTotalAmount'] > row['Limit']):
        val = 'Rejected'
    elif row['NumberOfTrades'] < row['MaxNumberOfTrades']:
        val = 'Pending'
    else:
        val = 'Accepted'
    return val

dfg['State'] = dfg.apply(tstate, axis=1)

In [36]:
dfg['CorrelationId'] = dfg.index
dfg.sort_index()[['CorrelationId', 'NumberOfTrades', 'State']].to_csv(index=False)

'CorrelationId,NumberOfTrades,State\n200,1,Pending\n222,1,Rejected\n234,3,Accepted\n'

In [37]:
dfg.sort_index(ascending=False)

Unnamed: 0_level_0,NumberOfTrades,MaxNumberOfTrades,ActualTotalAmount,Limit,State,CorrelationId
CorrelationId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
234,3,3,500.0,1000.0,Accepted,234
222,1,1,600.0,500.0,Rejected,222
200,1,2,1000.0,1000.0,Pending,200


In [None]:
import pandas as pd
import xml.etree.ElementTree as ET
import io

def iter_docs(trades):
    trades_attr = trades.attrib
    for doc in trades.iter('trade'):
        doc_dict = trades_attr.copy()
        doc_dict.update(doc.attrib)
        doc_dict['data'] = doc.text
        yield doc_dict

xml_data = io.StringIO(xmlinput)

etree = ET.parse(xml_data) #create an ElementTree object 
doc_df = list(iter_docs(etree.getroot()))