# Pandas

- [Create](#create)
- [Index](#index)
- [Sort](#sort)

In [1]:
import pandas as pd
import numpy as np

pd.__version__

'0.23.1'

<a id='create'></a>
## Create

In [3]:
# Create empty dataframe
df = pd.DataFrame()
df

In [9]:
# Create from CSV
df = pd.read_csv('./data/file.csv')
df

Unnamed: 0,col1,col2
0,A,1
1,B,2
2,C,3


In [8]:
# Create from Series
col1 = pd.Series(['A', 'B', 'C'])
col2 = pd.Series([1, 2, 3])

df = pd.concat([col1, col2], axis=1)
df.columns = ['col1', 'col2']

df

Unnamed: 0,col1,col2
0,A,1
1,B,2
2,C,3


In [20]:
# Create from dictionary
df = pd.DataFrame({'col1': ['A', 'B', 'C'],
                   'col2': [1, 2, 3]})
df

Unnamed: 0,col1,col2
0,A,1
1,B,2
2,C,3


In [10]:
# Create from random values
df = pd.DataFrame(np.random.randn(5, 3), columns=['col1', 'col2', 'col3'])
df

Unnamed: 0,col1,col2,col3
0,-1.34124,0.434761,-1.599576
1,0.153011,-0.925636,-0.440415
2,-1.20412,2.96121,-1.434893
3,2.146288,0.613657,0.018767
4,1.970047,0.58253,-0.01789


## Delete

In [41]:
# Drop column(s) by index
df = pd.DataFrame(np.random.randn(5, 3), columns=['col1', 'col2', 'col3'])

df = df.drop(df.columns[[0,2]], axis=1)
df

Unnamed: 0,col2
0,-0.989766
1,0.450066
2,2.666293
3,-0.156339
4,0.573351


In [42]:
# Drop column(s) by name
df = pd.DataFrame(np.random.randn(5, 3), columns=['col1', 'col2', 'col3'])

df = df.drop(['col1', 'col3'], axis=1)
df

Unnamed: 0,col2
0,0.702587
1,-0.681975
2,1.31634
3,0.87381
4,0.96907


<a id='index'></a>
## Index

In [37]:
# Change index (move columns left to become an index)
df = pd.DataFrame({'col1': [3, 1, 2],
                   'col2': ['A', 'B', 'C'],
                   'col3': [8, 5, 7],
                   'col4': [True, False, True]})

df = df.set_index(['col1', 'col3'])  # can have multiple indexes
df

Unnamed: 0_level_0,Unnamed: 1_level_0,col2,col4
col1,col3,Unnamed: 2_level_1,Unnamed: 3_level_1
3,8,A,True
1,5,B,False
2,7,C,True


In [39]:
# Undo index (move columns back to the right)
df = df.reset_index('col1')
df

Unnamed: 0_level_0,col1,col2,col4
col3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
8,3,A,True
5,1,B,False
7,2,C,True


<a id='sort'></a>
## Sort

In [18]:
# Sort by value
df = pd.DataFrame({'col1': [3, 1, 2],
                   'col2': [1, 2, 3]})

df.sort_values(by='col1')

Unnamed: 0,col1,col2
1,1,2
2,2,3
0,3,1


In [19]:
df.sort_values(by='col1', ascending=False)

Unnamed: 0,col1,col2
0,3,1
2,2,3
1,1,2


In [46]:
# Sort by index
df = pd.DataFrame({'col1': [3, 1, 2],
                   'col2': [1, 2, 3]}).set_index('col1')

df.sort_index()

Unnamed: 0_level_0,col2
col1,Unnamed: 1_level_1
1,2
2,3
3,1
