In [1]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
import pandas as pd
import numpy as np

df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                   columns=['a', 'b', 'c'])

# COLUMNS
df.columns

# INDEX
df.index
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [4]:
# sort by col value, and get the row containing the max 
df.loc[df.sort_values(by=['a'], ascending=False).a.idxmax()]

a    7
b    8
c    9
Name: 2, dtype: int32

In [5]:
# select columns
df.loc[:, ['a','b']]

Unnamed: 0,a,b
0,1,2
1,4,5
2,7,8


In [6]:
# filter rows by col value
df.loc[(df['a'] > 3) & (df['b'] < 8)]

Unnamed: 0,a,b,c
1,4,5,6


In [10]:
# replace a column with booleans
df['b'] = [True, False, False]


Unnamed: 0,a,b,c
0,1,True,3
1,4,False,6
2,7,False,9


In [21]:
# replace a column with booleans based on a condition
df['a'] = (df['c'] > 3) # & (df['b'] == True)
df

Unnamed: 0,a,b,c
0,False,True,3
1,True,False,6
2,True,False,9


In [8]:
# turn df into array [rowname, colname, value] - ready for 3D bar plotting
arr = []
for label, content in df.items():
    arr.append([label, content])
    
# type(arr[0][1]) # pandas.core.series.Series
a = arr[0]


[['a',
  0    1
  1    4
  2    7
  Name: a, dtype: int32],
 ['b',
  0    2
  1    5
  2    8
  Name: b, dtype: int32],
 ['c',
  0    3
  1    6
  2    9
  Name: c, dtype: int32]]

In [22]:
for i in df.index:
    print(i)

0
1
2


In [24]:
for c in df.columns:
    print(c)

a
b
c


In [44]:
def prepare_xyz(df):
    arr = []

    for day in df.index:
        for exp in df.columns:
            bar = [exp, day, df.loc[day, exp]]
            arr.append(bar)
    #         print(bar)
    x = [] 
    y = [] 
    z = []

    for bar in arr:
        exp, day, strike = bar
        x.append(exp)
        y.append(day)
        z.append(strike)
        
    return x, y, z

prepare_xyz(df)

(['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c'],
 [0, 0, 0, 1, 1, 1, 2, 2, 2],
 [1, 2, 3, 4, 5, 6, 7, 8, 9])

In [50]:
# save to csv
win_filename = 'D:\\code\\test.csv'
df.to_csv(win_filename)
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [55]:
# read from csv
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
pd.read_csv(win_filename, index_col=0)

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [56]:
# concatenate dfs
df2 = pd.DataFrame(np.arange(9).reshape(3,3), columns=['a','b','c'])
pd.concat([df, df2], axis=0, ignore_index=True)

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9
0,0,1,2
1,3,4,5
2,6,7,8


In [60]:
# concatenate unmatching dfs - missing column names
df2 = pd.DataFrame(np.arange(9).reshape(3,3))
newdf = pd.concat([df, df2], axis=0, ignore_index=True)

newdf

Unnamed: 0,a,b,c,0,1,2
0,1.0,2.0,3.0,,,
1,4.0,5.0,6.0,,,
2,7.0,8.0,9.0,,,
3,,,,0.0,1.0,2.0
4,,,,3.0,4.0,5.0
5,,,,6.0,7.0,8.0


In [62]:
# concatenate unmatching dfs - diff column names
df3 = pd.DataFrame(np.arange(9).reshape(3,3), columns=['d', 'e', 'f'])
newdf = pd.concat([df, df3], axis=0, ignore_index=True)

newdf


Unnamed: 0,a,b,c,d,e,f
0,1.0,2.0,3.0,10,,
1,4.0,5.0,6.0,11,,
2,7.0,8.0,9.0,12,,
3,,,,0,1.0,2.0
4,,,,3,4.0,5.0
5,,,,6,7.0,8.0


In [61]:
df['d'] = [10, 11, 12]
df

Unnamed: 0,a,b,c,d
0,1,2,3,10
1,4,5,6,11
2,7,8,9,12


In [8]:
# shift
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shift.html
df[df['a'].shift(1) <= 4]['a'] # Arousi's strategy https://github.com/ranaroussi/futuresio-webinars/blob/master/01-prototyping-trading-strategies.ipynb

1    4
2    7
Name: a, dtype: int64

In [18]:
# dropna
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html?highlight=dropna#pandas.DataFrame.dropna
df['d'] = [np.nan, 11, 12]
df['e'] = [13, 14, pd.NaT]
df.dropna() # df.dropna(inplace=False, axis=0, thresh=0, how='any')
# df.dropna(inplace=True) # overwrites df

df.dropna(subset=['a', 'e']) # look in these columns

df.dropna(thresh=2) # need at least 2 nans

df.dropna(how='all') # drops row only if all vals are nans. 'any' - default

df.dropna(axis=1) # 0 = index = drops rows; 1 = drops columns

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [19]:
# df.drop
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html
df.drop(['a', 'c'], axis=1)

Unnamed: 0,b,d,e
0,2,,13
1,5,11.0,14
2,8,12.0,NaT


In [23]:
# show rows were col d is not nan 
df.loc[~np.isnan(df['d'])]

Unnamed: 0,a,b,c,d,e
1,4,5,6,11.0,14
2,7,8,9,12.0,NaT


Unnamed: 0,heights,std
0,600,164.71187
1,470,164.71187
2,170,164.71187
3,430,164.71187
4,300,164.71187
