# Pandas

### Import Pandas

In [1]:
import pandas as pd # !pip install pandas
import numpy as np # !pip install numpy

### Series

In [2]:
pd.Series([1, 2, 3, -4, 'amir', np.nan, 0, True])

0       1
1       2
2       3
3      -4
4    amir
5     NaN
6       0
7    True
dtype: object

### Date

In [3]:
# it will print out the 6 days after 2023/01/01
dates = pd.date_range("20230101", periods=6)
dates

DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06'],
              dtype='datetime64[ns]', freq='D')

### Data Frame

In [55]:
# Create a DataFrame with 6 row<index>: dates, 4 columns: A-D
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=["A", "B", "C", "D"])
df

Unnamed: 0,A,B,C,D
2023-01-01,1.679767,-1.984826,-0.000371,0.607297
2023-01-02,-0.792487,-1.225349,-0.214858,0.031325
2023-01-03,-0.340467,0.145488,0.048926,0.675969
2023-01-04,-0.684253,-1.352489,0.100804,2.276624
2023-01-05,-0.452067,-1.591227,-0.156161,0.678262
2023-01-06,-0.132161,1.152605,-0.38833,-0.751013


In [56]:
# Show the Types of DataFrame
print("DataFrame Type")
print(df.dtypes)
print("----------------------")
print("'A' Column Type")
print(df["A"])

DataFrame Type
A    float64
B    float64
C    float64
D    float64
dtype: object
----------------------
'A' Column Type
2023-01-01    1.679767
2023-01-02   -0.792487
2023-01-03   -0.340467
2023-01-04   -0.684253
2023-01-05   -0.452067
2023-01-06   -0.132161
Freq: D, Name: A, dtype: float64


In [57]:
# Show the Head of the DataFrame
df.head(2)

Unnamed: 0,A,B,C,D
2023-01-01,1.679767,-1.984826,-0.000371,0.607297
2023-01-02,-0.792487,-1.225349,-0.214858,0.031325


In [58]:
# Show the end of the DataFrame
df.tail(2)

Unnamed: 0,A,B,C,D
2023-01-05,-0.452067,-1.591227,-0.156161,0.678262
2023-01-06,-0.132161,1.152605,-0.38833,-0.751013


In [59]:
# Explain Whats in the DataFrame
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.120278,-0.8093,-0.101665,0.586411
std,0.913105,1.201841,0.185414,0.997049
min,-0.792487,-1.984826,-0.38833,-0.751013
25%,-0.626207,-1.531542,-0.200183,0.175318
50%,-0.396267,-1.288919,-0.078266,0.641633
75%,-0.184238,-0.197221,0.036602,0.677689
max,1.679767,1.152605,0.100804,2.276624


In [60]:
# get the mean of the each column
"""Also you can see only one column:
df[<name of the column>].mean()
OR
df[0:3].mean()
"""
df.mean()

A   -0.120278
B   -0.809300
C   -0.101665
D    0.586411
dtype: float64

In [63]:
# Transpose the element of the DataFrame
df.T

Unnamed: 0,2023-01-01,2023-01-02,2023-01-03,2023-01-04,2023-01-05,2023-01-06
A,1.679767,-0.792487,-0.340467,-0.684253,-0.452067,-0.132161
B,-1.984826,-1.225349,0.145488,-1.352489,-1.591227,1.152605
C,-0.000371,-0.214858,0.048926,0.100804,-0.156161,-0.38833
D,0.607297,0.031325,0.675969,2.276624,0.678262,-0.751013


In [65]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2023-01-01,1.679767,-1.984826,-0.000371,0.607297
2023-01-05,-0.452067,-1.591227,-0.156161,0.678262
2023-01-04,-0.684253,-1.352489,0.100804,2.276624
2023-01-02,-0.792487,-1.225349,-0.214858,0.031325
2023-01-03,-0.340467,0.145488,0.048926,0.675969
2023-01-06,-0.132161,1.152605,-0.38833,-0.751013


In [66]:
# Show the rows that "A" Columns is Bigger Than 0
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2023-01-01,1.679767,-1.984826,-0.000371,0.607297


### Write Operations

In [79]:
# Export DataFrame To a CSV file
df.to_csv("/tmp/export.csv")
# Export DataFrame To a excel file
df.to_excel("/tmp/export.xlsx") # !pip install openpyxl