In [3]:
import pandas as pd
import numpy as np

# Pandas Series

In [4]:
# the labeled index
myindex = ['USA', 'Canada', 'Mexico']
# the value
mydata = [1776, 1867, 1821]

In [5]:
myser = pd.Series(data=mydata, index=myindex)

In [6]:
myser

USA       1776
Canada    1867
Mexico    1821
dtype: int64

In [7]:
myser[0]

1776

In [8]:
myser['USA']

1776

In [9]:
# create dictionary
ages = {'Sam' : 10, 'Frank' : 11, 'Spike' : 12}
#transform dictionary to Series
pd.Series(ages)

Sam      10
Frank    11
Spike    12
dtype: int64

In [10]:
# Dictionary
q1 = {'Japan' : 100, 'USA' : 200, 'England' : 150}
q2 = {'Brazil' : 150, 'Vietnam' : 150, 'England': 100}

In [11]:
# Transform into dictionary
sale_q1 = pd.Series(q1)
sale_q2 = pd.Series(q2)

In [12]:
sale_q1

Japan      100
USA        200
England    150
dtype: int64

In [13]:
sale_q2

Brazil     150
Vietnam    150
England    100
dtype: int64

In [14]:
sale_q1.keys()

Index(['Japan', 'USA', 'England'], dtype='object')

In [15]:
sale_q1 + sale_q2

Brazil       NaN
England    250.0
Japan        NaN
USA          NaN
Vietnam      NaN
dtype: float64

In [16]:
total_sale = sale_q1.add(sale_q2, fill_value = 0)

In [17]:
total_sale

Brazil     150.0
England    250.0
Japan      100.0
USA        200.0
Vietnam    150.0
dtype: float64

In [18]:
sale_q1.dtype

dtype('int64')

In [19]:
total_sale.dtype

dtype('float64')

In [20]:
#transform the dtype of total_sale
total_sale = total_sale.apply(np.int64)

In [21]:
total_sale.dtype

dtype('int64')

In [22]:
total_sale

Brazil     150
England    250
Japan      100
USA        200
Vietnam    150
dtype: int64

In [23]:
total_sale.astype(float)

Brazil     150.0
England    250.0
Japan      100.0
USA        200.0
Vietnam    150.0
dtype: float64

In [24]:
total_sale.dtype

dtype('int64')

In [25]:
total_sale.aggregate(func=np.exp, axis = 0)

Brazil      1.393710e+65
England    3.746455e+108
Japan       2.688117e+43
USA         7.225974e+86
Vietnam     1.393710e+65
dtype: float64

In [26]:
total_sale.aggregate(func=np.prod, axis = 0)

112500000000

# DataFrames

In [27]:
data_one = {'A': ['A1','A2', 'A3', 'A4'], 'B' : ['B1', 'B2', 'B3', 'B4']}
data_two = {'C': ['C1', 'C2', 'C3', 'C4'], 'D' : ['D1', 'D2', 'D3', 'D4']}

In [28]:
data_one

{'A': ['A1', 'A2', 'A3', 'A4'], 'B': ['B1', 'B2', 'B3', 'B4']}

In [29]:
data_two

{'C': ['C1', 'C2', 'C3', 'C4'], 'D': ['D1', 'D2', 'D3', 'D4']}

In [30]:
one = pd.DataFrame(data=data_one)

In [31]:
two = pd.DataFrame(data=data_two)

In [32]:
one

Unnamed: 0,A,B
0,A1,B1
1,A2,B2
2,A3,B3
3,A4,B4


In [33]:
two

Unnamed: 0,C,D
0,C1,D1
1,C2,D2
2,C3,D3
3,C4,D4


In [34]:
# concatenate the dataframe one and dataframe two
pd.concat(objs=[one,two], axis = 0)

Unnamed: 0,A,B,C,D
0,A1,B1,,
1,A2,B2,,
2,A3,B3,,
3,A4,B4,,
0,,,C1,D1
1,,,C2,D2
2,,,C3,D3
3,,,C4,D4


In [35]:
mydf = pd.concat(objs=[one, two], axis=1)

In [36]:
mydf

Unnamed: 0,A,B,C,D
0,A1,B1,C1,D1
1,A2,B2,C2,D2
2,A3,B3,C3,D3
3,A4,B4,C4,D4


In [37]:
mydf.index = range(len(mydf))

In [38]:
mydf

Unnamed: 0,A,B,C,D
0,A1,B1,C1,D1
1,A2,B2,C2,D2
2,A3,B3,C3,D3
3,A4,B4,C4,D4


In [39]:
# After the conference we have these two table
# The respective id columns indicate what order they registered or logged in on site
# Assume name is unique
# Registrations names' first letter go A,B,C,D
registrations = pd.DataFrame({'reg_id':[1,2,3,4],'name':['Andrew','Bobo','Claire','David']})
logins = pd.DataFrame({'log_id':[1,2,3,4],'name':['Xavier','Andrew','Yolanda','Bobo']})

In [40]:
registrations

Unnamed: 0,reg_id,name
0,1,Andrew
1,2,Bobo
2,3,Claire
3,4,David


In [41]:
logins

Unnamed: 0,log_id,name
0,1,Xavier
1,2,Andrew
2,3,Yolanda
3,4,Bobo


In [42]:
pops = pd.merge(left=registrations, right=logins, how='inner', on='name')

In [43]:
pops

Unnamed: 0,reg_id,name,log_id
0,1,Andrew,2
1,2,Bobo,4


In [44]:
pd.merge(left=registrations, right=logins, how='left', on='name')

Unnamed: 0,reg_id,name,log_id
0,1,Andrew,2.0
1,2,Bobo,4.0
2,3,Claire,
3,4,David,


In [45]:
pd.merge(left=registrations, right=logins, how='right', on='name')

Unnamed: 0,reg_id,name,log_id
0,,Xavier,1
1,1.0,Andrew,2
2,,Yolanda,3
3,2.0,Bobo,4


In [46]:
pd.merge(left=registrations, right=logins, how='outer', on='name')

Unnamed: 0,reg_id,name,log_id
0,1.0,Andrew,2.0
1,2.0,Bobo,4.0
2,3.0,Claire,
3,4.0,David,
4,,Xavier,1.0
5,,Yolanda,3.0


In [47]:
registrations = registrations.set_index('name')

In [48]:
registrations

Unnamed: 0_level_0,reg_id
name,Unnamed: 1_level_1
Andrew,1
Bobo,2
Claire,3
David,4


In [49]:
# merge the index and column together
pd.merge(left=registrations,right=logins, left_index=True, right_on= 'name')

Unnamed: 0,reg_id,log_id,name
1,1,2,Andrew
3,2,4,Bobo


In [50]:
registrations = registrations.reset_index()

In [51]:
registrations

Unnamed: 0,name,reg_id
0,Andrew,1
1,Bobo,2
2,Claire,3
3,David,4


In [52]:
logins['name'].dtype

dtype('O')

In [53]:
registrations.columns = ['reg_name', 'reg_id']

In [54]:
registrations.columns

Index(['reg_name', 'reg_id'], dtype='object')

In [55]:
registrations

Unnamed: 0,reg_name,reg_id
0,Andrew,1
1,Bobo,2
2,Claire,3
3,David,4


In [56]:
pd.merge(registrations, logins, left_on= 'reg_name', right_on= 'name', how='inner')

Unnamed: 0,reg_name,reg_id,log_id,name
0,Andrew,1,2,Andrew
1,Bobo,2,4,Bobo


In [57]:
pd.merge(left= registrations, right=logins, left_on='reg_name', right_on='name', how='inner').drop(labels='reg_name', axis=1)

Unnamed: 0,reg_id,log_id,name
0,1,2,Andrew
1,2,4,Bobo


In [58]:
registrations.columns = ['name', 'id']

In [59]:
registrations

Unnamed: 0,name,id
0,Andrew,1
1,Bobo,2
2,Claire,3
3,David,4


In [60]:
logins.columns = ['id', 'name']

In [61]:
logins

Unnamed: 0,id,name
0,1,Xavier
1,2,Andrew
2,3,Yolanda
3,4,Bobo


In [62]:
pd.merge

<function pandas.core.reshape.merge.merge(left: 'DataFrame | Series', right: 'DataFrame | Series', how: 'str' = 'inner', on: 'IndexLabel | None' = None, left_on: 'IndexLabel | None' = None, right_on: 'IndexLabel | None' = None, left_index: 'bool' = False, right_index: 'bool' = False, sort: 'bool' = False, suffixes: 'Suffixes' = ('_x', '_y'), copy: 'bool' = True, indicator: 'bool' = False, validate: 'str | None' = None) -> 'DataFrame'>

In [63]:
haha

NameError: name 'haha' is not defined

In [64]:
registrations

Unnamed: 0,name,id
0,Andrew,1
1,Bobo,2
2,Claire,3
3,David,4


In [65]:
logins

Unnamed: 0,id,name
0,1,Xavier
1,2,Andrew
2,3,Yolanda
3,4,Bobo


In [66]:
logins

Unnamed: 0,id,name
0,1,Xavier
1,2,Andrew
2,3,Yolanda
3,4,Bobo


In [67]:
registrations

Unnamed: 0,name,id
0,Andrew,1
1,Bobo,2
2,Claire,3
3,David,4


In [68]:
huy chanh an cut

SyntaxError: invalid syntax (1814081196.py, line 1)