# Pandas

In [4]:
import pandas as pd
import numpy as np

In [6]:
pd.Series([1,2,3, 'jadi', 0, np.nan, -1])

0       1
1       2
2       3
3    jadi
4       0
5     NaN
6      -1
dtype: object

In [10]:
dates = pd.date_range("20210101", periods=6)
dates

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06'],
              dtype='datetime64[ns]', freq='D')

In [13]:
np.random.randn(6, 4)

array([[ 1.00076577, -1.12191301, -0.07519044,  0.01039303],
       [-0.21432663,  0.59632683, -0.43922072,  0.18174457],
       [ 0.22965407, -0.30082675, -0.31837493,  0.53229639],
       [-1.86420943, -1.60417157,  0.24284198, -1.48418259],
       [-0.71446245, -1.20570903,  0.47219997,  0.34639358],
       [ 0.52063144,  1.03632505,  0.64436489, -0.93535429]])

In [22]:
df = pd.DataFrame(np.random.randn(6, 4),
                  index=dates, 
                  columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
2021-01-01,-0.127621,0.089565,1.953672,0.437813
2021-01-02,-0.420844,-0.245051,0.189711,-0.344652
2021-01-03,0.512969,-0.839389,-0.551133,-1.742437
2021-01-04,-0.385857,-0.581673,-0.579605,0.509468
2021-01-05,2.661093,-0.025486,-0.479381,-0.548437
2021-01-06,0.427096,-0.274514,-0.344881,1.086411


In [20]:
df.dtypes

A    float64
b    float64
c    float64
d    float64
dtype: object

In [21]:
df['A']

2021-01-01    1.571094
2021-01-02    1.247788
2021-01-03   -2.595912
2021-01-04    2.416555
2021-01-05   -1.548033
2021-01-06   -1.284087
Freq: D, Name: A, dtype: float64

In [24]:
df.head(2)

Unnamed: 0,A,B,C,D
2021-01-01,-0.127621,0.089565,1.953672,0.437813
2021-01-02,-0.420844,-0.245051,0.189711,-0.344652


In [25]:
df.tail(2)

Unnamed: 0,A,B,C,D
2021-01-05,2.661093,-0.025486,-0.479381,-0.548437
2021-01-06,0.427096,-0.274514,-0.344881,1.086411


In [28]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.444473,-0.312758,0.031397,-0.100305
std,1.156193,0.346436,0.983397,1.002678
min,-0.420844,-0.839389,-0.579605,-1.742437
25%,-0.321298,-0.504883,-0.533195,-0.497491
50%,0.149737,-0.259782,-0.412131,0.046581
75%,0.491501,-0.080378,0.056063,0.491555
max,2.661093,0.089565,1.953672,1.086411


In [30]:
df["A"].mean()

A   -0.274232
B   -0.077743
C    1.071692
D    0.046581
dtype: float64

In [31]:
df.T

Unnamed: 0,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06
A,-0.127621,-0.420844,0.512969,-0.385857,2.661093,0.427096
B,0.089565,-0.245051,-0.839389,-0.581673,-0.025486,-0.274514
C,1.953672,0.189711,-0.551133,-0.579605,-0.479381,-0.344881
D,0.437813,-0.344652,-1.742437,0.509468,-0.548437,1.086411


In [33]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2021-01-03,0.512969,-0.839389,-0.551133,-1.742437
2021-01-04,-0.385857,-0.581673,-0.579605,0.509468
2021-01-06,0.427096,-0.274514,-0.344881,1.086411
2021-01-02,-0.420844,-0.245051,0.189711,-0.344652
2021-01-05,2.661093,-0.025486,-0.479381,-0.548437
2021-01-01,-0.127621,0.089565,1.953672,0.437813


In [34]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2021-01-03,0.512969,-0.839389,-0.551133,-1.742437
2021-01-05,2.661093,-0.025486,-0.479381,-0.548437
2021-01-06,0.427096,-0.274514,-0.344881,1.086411


## read and write oprations

### CSV

In [66]:
df.to_csv("myfile.csv")

In [67]:
pd.read_csv("myfile.csv", index_col=0)

Unnamed: 0,A,B,C,D
2021-01-01,-0.127621,0.089565,1.953672,0.437813
2021-01-02,-0.420844,-0.245051,0.189711,-0.344652
2021-01-03,0.512969,-0.839389,-0.551133,-1.742437
2021-01-04,-0.385857,-0.581673,-0.579605,0.509468
2021-01-05,2.661093,-0.025486,-0.479381,-0.548437
2021-01-06,0.427096,-0.274514,-0.344881,1.086411


### XLSX

In [68]:
df.to_excel("myfile.xlsx")

In [70]:
pd.read_excel("myfile.xlsx", index_col=0)

Unnamed: 0,A,B,C,D
2021-01-01,-0.127621,0.089565,1.953672,0.437813
2021-01-02,-0.420844,-0.245051,0.189711,-0.344652
2021-01-03,0.512969,-0.839389,-0.551133,-1.742437
2021-01-04,-0.385857,-0.581673,-0.579605,0.509468
2021-01-05,2.661093,-0.025486,-0.479381,-0.548437
2021-01-06,0.427096,-0.274514,-0.344881,1.086411
