# Pandas

## Series

In [1]:
import numpy as np 
import pandas as pd

In [9]:
labels = ['A','B','C']
l = [10,20,30]
arr = np.array(l)
d = {'a':10, 'b':20, 'c':30}

In [6]:
pd.Series(l)

0    10
1    20
2    30
dtype: int64

In [7]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int64

In [111]:
my_series = pd.Series(data=[1,2,3,4], index='A B C D'.split())

In [114]:
my_series[::]

A    1
B    2
C    3
D    4
dtype: int64

In [11]:
pd.Series(data=[1,'a',0.5])

0      1
1      a
2    0.5
dtype: object

In [130]:
s1 = pd.Series(data=[1,2,3], index=['A','B','C'])
s2 = pd.Series(data=[4,5,6,7], index=['A','B','D','E'])
print(s1,'\n')
print(s2)

A    1
B    2
C    3
dtype: int64 

A    4
B    5
D    6
E    7
dtype: int64


In [131]:
(s1+s2)

A    5.0
B    7.0
C    NaN
D    NaN
E    NaN
dtype: float64

## DataFrame

In [151]:
pd.DataFrame(data = np.arange(1,10).reshape(3,3), columns = list('ABC'), index=list('XYZ'))

Unnamed: 0,A,B,C
X,1,2,3
Y,4,5,6
Z,7,8,9


In [152]:
rng = np.random.default_rng(42)
m = rng.standard_normal((5,4))
m

array([[ 0.30471708, -1.03998411,  0.7504512 ,  0.94056472],
       [-1.95103519, -1.30217951,  0.1278404 , -0.31624259],
       [-0.01680116, -0.85304393,  0.87939797,  0.77779194],
       [ 0.0660307 ,  1.12724121,  0.46750934, -0.85929246],
       [ 0.36875078, -0.9588826 ,  0.8784503 , -0.04992591]])

In [153]:
df = pd.DataFrame(data=m, index='A B C D E'.split(),columns='W X Y Z'.split())
df

Unnamed: 0,W,X,Y,Z
A,0.304717,-1.039984,0.750451,0.940565
B,-1.951035,-1.30218,0.12784,-0.316243
C,-0.016801,-0.853044,0.879398,0.777792
D,0.066031,1.127241,0.467509,-0.859292
E,0.368751,-0.958883,0.87845,-0.049926


In [188]:
df.loc[::,'new_new_col'] = df.loc[::,'X'] + df.loc[::,'Y']

In [200]:
df.drop(labels='new_col new_new_col'.split(),axis=1,inplace=True)

In [209]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,0.304717,,0.750451,0.940565
B,,,0.12784,
C,,,0.879398,0.777792
D,0.066031,1.127241,0.467509,
E,0.368751,,0.87845,


In [219]:
df.loc['A','X'] = 1

In [237]:
crit = df > 0
df[crit].dropna(axis='index')

Unnamed: 0,W,X,Y,Z
A,0.304717,1.0,0.750451,0.940565


In [243]:
df.reset_index(inplace=True)

In [244]:
new_index = 'AA BB CC DD EE'.split()

In [245]:
df['new_index'] = new_index

In [246]:
df

Unnamed: 0,index,W,X,Y,Z,new_index
0,A,0.304717,1.0,0.750451,0.940565,AA
1,B,-1.951035,-1.30218,0.12784,-0.316243,BB
2,C,-0.016801,-0.853044,0.879398,0.777792,CC
3,D,0.066031,1.127241,0.467509,-0.859292,DD
4,E,0.368751,-0.958883,0.87845,-0.049926,EE


In [247]:
df.set_index('new_index',inplace=True)

In [250]:
df.rename({'index':'old_index'},axis=1,inplace=True)

In [251]:
df

Unnamed: 0_level_0,old_index,W,X,Y,Z
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AA,A,0.304717,1.0,0.750451,0.940565
BB,B,-1.951035,-1.30218,0.12784,-0.316243
CC,C,-0.016801,-0.853044,0.879398,0.777792
DD,D,0.066031,1.127241,0.467509,-0.859292
EE,E,0.368751,-0.958883,0.87845,-0.049926


In [255]:
rng = np.random.default_rng(42)
data = rng.standard_normal((5,4))
data

array([[ 0.30471708, -1.03998411,  0.7504512 ,  0.94056472],
       [-1.95103519, -1.30217951,  0.1278404 , -0.31624259],
       [-0.01680116, -0.85304393,  0.87939797,  0.77779194],
       [ 0.0660307 ,  1.12724121,  0.46750934, -0.85929246],
       [ 0.36875078, -0.9588826 ,  0.8784503 , -0.04992591]])

In [257]:
idx = list('ABCDE')
cols = list('WXYZ')
df = pd.DataFrame(data=data, index=idx, columns=cols)
df

Unnamed: 0,W,X,Y,Z
A,0.304717,-1.039984,0.750451,0.940565
B,-1.951035,-1.30218,0.12784,-0.316243
C,-0.016801,-0.853044,0.879398,0.777792
D,0.066031,1.127241,0.467509,-0.859292
E,0.368751,-0.958883,0.87845,-0.049926


In [258]:
# Reset index to default integer position based index
df.reset_index(inplace=True)
# Rename old index
df.rename({'index':'old_index'},axis=1,inplace=True)
# Create data for the new index
new_index = 'AA BB CC DD EE'.split()
# Assign data to new column
df['new_index'] = new_index
# Set new index
df.set_index('new_index',inplace=True)
df

Unnamed: 0_level_0,old_index,W,X,Y,Z
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AA,A,0.304717,-1.039984,0.750451,0.940565
BB,B,-1.951035,-1.30218,0.12784,-0.316243
CC,C,-0.016801,-0.853044,0.879398,0.777792
DD,D,0.066031,1.127241,0.467509,-0.859292
EE,E,0.368751,-0.958883,0.87845,-0.049926


In [259]:
print(df)

          old_index         W         X         Y         Z
new_index                                                  
AA                A  0.304717 -1.039984  0.750451  0.940565
BB                B -1.951035 -1.302180  0.127840 -0.316243
CC                C -0.016801 -0.853044  0.879398  0.777792
DD                D  0.066031  1.127241  0.467509 -0.859292
EE                E  0.368751 -0.958883  0.878450 -0.049926


In [260]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, AA to EE
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   old_index  5 non-null      object 
 1   W          5 non-null      float64
 2   X          5 non-null      float64
 3   Y          5 non-null      float64
 4   Z          5 non-null      float64
dtypes: float64(4), object(1)
memory usage: 240.0+ bytes


In [261]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
W,5.0,-0.245668,0.966727,-1.951035,-0.016801,0.066031,0.304717,0.368751
X,5.0,-0.60537,0.982686,-1.30218,-1.039984,-0.958883,-0.853044,1.127241
Y,5.0,0.62073,0.322712,0.12784,0.467509,0.750451,0.87845,0.879398
Z,5.0,0.098579,0.755301,-0.859292,-0.316243,-0.049926,0.777792,0.940565
