### Introduction to pandas
What is pandas?
- A Python library for data manipulation, analysis, and cleaning.
- Built on top of NumPy.

**Installing Pandas**

In [None]:
pip install pandas

**Importing Pandas**

In [None]:
import pandas as pd
# importing numpy for other operations
import numpy as np

### Data Structures

In [None]:
# 1D Array
s = pd.Series([1, 3, 5, 7, 9])
print(s)

In [None]:
# DataFrame: 2D labeled table
data = {'Name': ['Alice', 'Bob'], 'Age': [25, 30]}
df = pd.DataFrame(data)
print(df)

### Data Frames
- From dictionaries
- From lists of list
- From CSV/Excel/SQL

In [None]:
df = pd.read_csv('../data/nepal-covid.csv')
df

### Basic Data Exploration

In [None]:
# Head & Tail
df.head()    # First 5 rows
df.tail()    # Last 5 rows

In [None]:
# Shape
df.shape     # (rows, columns)

In [None]:
# Info & Describe
df.info()    # Overview
df.describe() # Statistics

In [None]:
# Columns and Index
print(df.columns)
print(df.index)


### Selecting Data

In [None]:
# Single Column
df["Province"]

In [None]:
# Multiple Columns
df[['Province', 'District']]

In [None]:
# Row by index
print("Use ILoc: ", df.iloc[2, 1])   # By row, column index
print("Use Ioc: ", df.loc[2, "District"])    # By row, column label

In [None]:
# Conditional Selection
df[df["Sex"] == "Male"]

### Data Cleaning

In [None]:
# Handling Missing Data
df.isnull()
df.dropna()
df.fillna(0)

In [None]:
df.replace('?', np.nan)

In [None]:
# Renaming columns
df.rename(columns={'Province': 'State'}, inplace=True)
df

In [None]:
# Reverting
df.rename(columns={'State': 'Province'}, inplace=True)
df

### Data Manipulation

In [None]:
# Sorting
df.sort_values('Value')


In [None]:
# Adding new columns
df["new_column"]=df["Value"] * 4
df

**Dropping columns**
- In pandas, axis controls whether an operation is applied row-wise or column-wise.
- axis=0	Operate along rows (up ↕ down)	(Down the rows, vertically)
- axis=1	Operate along columns (left ↔ right)	(Across the columns, horizontally)

In [None]:
df.drop('new_column', axis=1)

In [None]:
# Droping rows
# Add new temporary row to remove
print(df.columns)
new_row = {
    "Province": "TEST PROVINCE",
    "District": "TEST DISTRICT",
    "Sex": "Male",
    "Period": "2020-01-04",
    "Age": "14-50",
    "Value": 5,
}
df=pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

df.tail

In [None]:
# Remove last Row
print("Last row idx", df.index[-1])
df.drop(df.index[-1], inplace=True)
df

### Grouping and Aggregations


In [None]:
# Groupby
df.groupby("Province").describe() # Can use other functions like mean median etc

In [None]:
# Aggregation
print("sum:", df['Value'].sum())
print("mean:", df['Value'].mean())


### Merging, Joining, and Concatenating

In [None]:
# Concat 
# Same as adding new row (concat: see above)

In [None]:
# Merging - Similar to Join in SQL
# Can use how argument to tell the join behavior
# how values: inner (default), outer, left, right
df1 = pd.DataFrame({
    'id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie']
})

# Second DataFrame
df2 = pd.DataFrame({
    'id': [1, 2, 4],
    'age': [25, 30, 40]
})

pd.merge(df1, df2, on='id', how='inner')

In [None]:
# Join
# Joins another df to a df
df1 = pd.DataFrame({
    'id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie']
})

# Second DataFrame
df2 = pd.DataFrame({
    'id': [1, 2, 4],
    'age': [25, 30, 40]
})

df1.join(df2, lsuffix='_left', rsuffix='_right')

df1
