# 08A: Pandas

Demonstrating common Pandas functionality

In [1]:
import pandas as pd

In [2]:
# creating a Pandas dataframe
data = {'color':['red','green','blue'],'rating':[1,2,3]}

df = pd.DataFrame(data)
print(df)
print("")

# to return a row in the df, use .loc and specify the index position
print(df.loc[0])

   color  rating
0    red       1
1  green       2
2   blue       3

color     red
rating      1
Name: 0, dtype: object


In [3]:
# create an empty dataframe
df = pd.DataFrame()

# with just columns
df = pd.DataFrame(columns=['A','B','C','D','E'])
print(df)
print("")

# with just an index
df = pd.DataFrame(index=range(5))
print(df)
print("")

# take a copy and drop all rows
new_df = df[0:0]   # takes the structure of df but with zero rows
print(new_df)

# append data
data = [1,2,3,4,5]
df.append(data)
print(df)

Empty DataFrame
Columns: [A, B, C, D, E]
Index: []

Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4]

Empty DataFrame
Columns: []
Index: []
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4]


In [4]:
# reading a JSON string (additional)

# object has same format as a Python dictionary, with nesting
data = '{"colour":{"0":"red", "1":"green", "2":"blue"}, "rating": {"0":1,"1":2,"2":3}}'
df = pd.read_json(data)
print(df)

  colour  rating
0    red       1
1  green       2
2   blue       3


In [5]:
# using the JSON package (additional)
import requests
from pandas.io.json import json_normalize
import pandas as pd

url = "https://api.exchangerate-api.com/v4/latest/USD"
df = pd.read_json(url)
print(df)

                             provider  \
AED  https://www.exchangerate-api.com   
AFN  https://www.exchangerate-api.com   
ALL  https://www.exchangerate-api.com   
AMD  https://www.exchangerate-api.com   
ANG  https://www.exchangerate-api.com   
..                                ...   
XPF  https://www.exchangerate-api.com   
YER  https://www.exchangerate-api.com   
ZAR  https://www.exchangerate-api.com   
ZMW  https://www.exchangerate-api.com   
ZWL  https://www.exchangerate-api.com   

AED  https://www.exchangerate-api.com/docs/free   
AFN  https://www.exchangerate-api.com/docs/free   
ALL  https://www.exchangerate-api.com/docs/free   
AMD  https://www.exchangerate-api.com/docs/free   
ANG  https://www.exchangerate-api.com/docs/free   
..                                          ...   
XPF  https://www.exchangerate-api.com/docs/free   
YER  https://www.exchangerate-api.com/docs/free   
ZAR  https://www.exchangerate-api.com/docs/free   
ZMW  https://www.exchangerate-api.com/docs/free 

In [6]:
# write to csv file
data = [['Alise',32],['Bob',26],['Charlie',45]]
df = pd.DataFrame(data,columns=['Name','Age'])

df.to_csv('my_data.csv')

df.to_csv('my_data.csv',sep='\t', encoding='utf-8', index=False)

## Inspecting dataframes

First step should be to get a quick overview of the dataframe

In [7]:
# loading in simplified set of stats for London, as of 2013
df = pd.read_csv('LondonStats2013.csv')
print(df.head(3))
print(df.dtypes)
print(df.info)
print(df.describe)
print(df[0:3])

                        WardName   Wardcode  Pop2013  Aged0_15  Aged16_64  \
0                 City of London  E09000001     8000       600       6200   
1   Barking and Dagenham - Abbey  E05000026    13650      3450       9550   
2  Barking and Dagenham - Alibon  E05000027    10400      2700       6600   

   Aged65plus  PctBame  PctNotBornUK  MeanHousePrice2013  PctOpenSpace  ...  \
0        1200     21.4          36.7              595000       18.6425  ...   
1         700     71.9          57.3              164000       19.6072  ...   
2        1100     29.9          24.7              173500       22.4129  ...   

   AvgPubTransAccessibility11  CrimeRate1213  PopDensityKm2013  \
0                         7.6     730.789474       2538.062371   
1                         5.7     164.400000      10500.000000   
2                         3.2      83.900000       7428.600000   

   PctWithNoQual11  PctLev4Qual11  TurnoutMayoralElection12  Candidate  \
0              6.7           68.4  

In [8]:
# standard descriptive statistics that can be applied to any column (additional)
print(df['PctOpenSpace'].count())
print(df['PctOpenSpace'].sum())
print(df['PctOpenSpace'].mean())
print(df['PctOpenSpace'].min())
print(df['PctOpenSpace'].max())

625
16953.541185
27.125665895999997
0.0
88.8026
