# DataFrames

## Overview

In [3]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2]}
df = pd.DataFrame(dic)
df

Unnamed: 0,name,children
0,Livio,0
1,Marco,1
2,Lorenzo,2


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
name        3 non-null object
children    3 non-null int64
dtypes: int64(1), object(1)
memory usage: 128.0+ bytes


In [5]:
df.columns

Index(['name', 'children'], dtype='object')

## Estrazione Colonne

In [7]:
df.name

0      Livio
1      Marco
2    Lorenzo
Name: name, dtype: object

In [8]:
df["children"]

0    0
1    1
2    2
Name: children, dtype: int64

In [9]:
df.describe()

Unnamed: 0,children
count,3.0
mean,1.0
std,1.0
min,0.0
25%,0.5
50%,1.0
75%,1.5
max,2.0


In [12]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
        "free_time" : [1000, 100, 10]}
df = pd.DataFrame(dic)
df.describe()

Unnamed: 0,children,free_time
count,3.0,3.0
mean,1.0,370.0
std,1.0,547.448628
min,0.0,10.0
25%,0.5,55.0
50%,1.0,100.0
75%,1.5,550.0
max,2.0,1000.0


In [6]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2]}
df = pd.DataFrame(dic)
df['month'] =  ['Sept', 'Oct', 'Dec']
df

Unnamed: 0,name,children,month
0,Livio,0,Sept
1,Marco,1,Oct
2,Lorenzo,2,Dec


In [9]:
df['month'] = pd.Series(['Sept', 'Oct', 'Dec'], index=[4,-1,2])
df

Unnamed: 0,name,children,month
0,Livio,0,
1,Marco,1,
2,Lorenzo,2,Dec


In [10]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2]}
df = pd.DataFrame(dic)
df['year'] = '1981'
df

Unnamed: 0,name,children,year
0,Livio,0,1981
1,Marco,1,1981
2,Lorenzo,2,1981


In [19]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
       "year": [1981, 1981, 1981],
       'month': ['Sept', 'Oct', 'Dec']
      }
df = pd.DataFrame(dic)
df[['month', 'year']]

Unnamed: 0,month,year
0,Sept,1981
1,Oct,1981
2,Dec,1981


In [23]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
       "year": [1981, 1981, 1981],
       'month': ['Sept', 'Oct', 'Dec']
      }
df = pd.DataFrame(dic)
df.pop('month')

0    Sept
1     Oct
2     Dec
Name: month, dtype: object

In [26]:
import pandas as pd
list = [{ "name": "Livio","children": 0},
        { "name": "Marco","children": 1},
        { "name": "Lorenzo","children": 2, 'year': '1981'}]
df = pd.DataFrame(list)
df      

Unnamed: 0,children,name,year
0,0,Livio,
1,1,Marco,
2,2,Lorenzo,1981.0


In [39]:
import json
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
       "year": [1981, 1981, 1981],
       'month': ['Sept', 'Oct', 'Dec']
      }
json_col = json.dumps(dic)
print(type(json_col))
json_col


<class 'str'>


'{"name": ["Livio", "Marco", "Lorenzo"], "children": [0, 1, 2], "year": [1981, 1981, 1981], "month": ["Sept", "Oct", "Dec"]}'

In [40]:
json_col = """{ "name": ["Livio", "Marco", "Lorenzo"], 
    "children": [0, 1, 2],  "year": [1981, 1981, 1981], 
    "month": ["Sept", "Oct", "Dec"]}"""
df = pd.read_json(json_col)
df 

Unnamed: 0,name,children,year,month
0,Livio,0,1981,Sept
1,Marco,1,1981,Oct
2,Lorenzo,2,1981,Dec


In [42]:
df = pd.read_csv("./course_resources/LML_Dataframe.csv")
df

Unnamed: 0,name,children
0,livio,0
1,marco,1
2,lorenzo,2


# Algebra

## Selection

In [2]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
       "year": [1981, 1981, 1981],
       'month': ['Sept', 'Oct', 'Dec']
      }
df = pd.DataFrame(dic)
df[df.name == 'Livio']

Unnamed: 0,name,children,year,month
0,Livio,0,1981,Sept


In [45]:
df[df.name.str.contains('L')]

Unnamed: 0,name,children,year,month
0,Livio,0,1981,Sept
2,Lorenzo,2,1981,Dec


In [47]:
df[ (df.name.str.contains('L')) & (df.children > 0)]

Unnamed: 0,name,children,year,month
2,Lorenzo,2,1981,Dec


In [4]:
#requires library "numexpr"
df.query('name == "Livio" and children < 3')

Unnamed: 0,name,children,year,month
0,Livio,0,1981,Sept


In [2]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
       "year": [1981, 1981, 1981],
       'month': ['Sept', 'Oct', 'Dec']
      }
df = pd.DataFrame(dic)
df.head(2)

Unnamed: 0,name,children,year,month
0,Livio,0,1981,Sept
1,Marco,1,1981,Oct


In [3]:
df.tail(2)

Unnamed: 0,name,children,year,month
1,Marco,1,1981,Oct
2,Lorenzo,2,1981,Dec


In [6]:
df.iloc[[2, 0]]

Unnamed: 0,name,children,year,month
2,Lorenzo,2,1981,Dec
0,Livio,0,1981,Sept


In [12]:
df[-2:]

Unnamed: 0,name,children,year,month
1,Marco,1,1981,Oct
2,Lorenzo,2,1981,Dec


## Projection

In [20]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
       "year": [1981, 1981, 1981],
       'month': ['Sept', 'Oct', 'Dec']
      }
df = pd.DataFrame(dic)
#tutte le righe, le ultime 2 colonne
df.iloc[:,-2:]

Unnamed: 0,year,month
0,1981,Sept
1,1981,Oct
2,1981,Dec


In [22]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
       "year": [1981, 1981, 1981],
       'month': ['Sept', 'Oct', 'Dec']
      }
df = pd.DataFrame(dic)
df.loc[df.children > 0, ['name', 'year']]

Unnamed: 0,name,year
1,Marco,1981
2,Lorenzo,1981


Unnamed: 0,name,year
1,Marco,1981
2,Lorenzo,1981


## Product

In [27]:
import pandas as pd
dic = { "name": ["Livio", "Marco", "Lorenzo"],
       "children": [0, 1, 2],
       'month': ['Sept', 'Oct', 'Dec']
      }
df = pd.DataFrame(dic)
role = pd.DataFrame([{"name": "Livio", "class": "master"},
                    {"name": "Marco", "class" : "cleric"}]) 
df.merge(role)

Unnamed: 0,name,children,month,class
0,Livio,0,Sept,master
1,Marco,1,Oct,cleric


In [26]:
df.merge(role, how='left')

Unnamed: 0,name,children,month,class
0,Livio,0,Sept,master
1,Marco,1,Oct,cleric
2,Lorenzo,2,Dec,
