# Pandas Demo

In [None]:
import pandas as pd

## Create a simple Series

In [None]:
s = pd.Series([10, 20, 30, 40])

print(s)


## Create a DataFrame

In [None]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['London', 'Paris', 'Berlin']
}

df = pd.DataFrame(data)
print(df)


## Load a CSV file into a dataframe

In [None]:
df = pd.read_csv('data.csv')

# Display the first few rows of the dataframe
df.head()

## Display the dataframe info

In [None]:
df = pd.read_csv('data.csv')

#print(f"head():\n {df.head()}\n")           # First 5 rows

#print(f"tail():\n {df.tail()}\n")           # Last 5 rows

#print(f"shape:\n {df.shape}\n")             # (rows, columns)

#print(f"columns:\n {df.columns}\n")         # Column names

#print(f"describe():\n {df.describe()}\n")   # Statistics summary

print(df.info())                            # Summary info


## Display a single column

In [None]:
df['Sales']

## Display multiple columns

In [None]:
df[['Sales', 'Profit']]

## Display row by location

In [None]:
row = df.loc[[4,5],['Product_Name']]
row

## Display multiple rows by location

In [None]:
subset = df.iloc[:3]
subset

## Filter dataframe

In [None]:
high_sales = df[df['Sales'] > 3000]
high_sales

## Filter dataframe with multiple arguments

In [None]:
high_sales_profit = df[(df['Sales'] > 3000) & (df['Profit'] > 400)]
high_sales_profit

## Add a column to the dataset

In [None]:
df['Cost'] = df['Sales'] - df['Profit']
df.head()

## Manipulate existing data

In [None]:
df['Sales'] = df['Sales'] * 0.5
df

## Remove a column from the dataset

In [None]:
df_without_cost = df.drop(columns=['Cost'])
df_without_cost

## Rename a column

In [None]:
df_renamed = df.rename(columns={'Sales': 'Total_Sales'})
df_renamed

## Rename multiple columns

In [None]:
df_renamed_multiple = df.rename(columns={'Sales': 'Total_sales', 'Profit': 'Net_Profit'})
df_renamed_multiple

## Use built in Pandas functions

In [None]:
df_region_sales = df.groupby('Region')['Sales'].sum()
df_region_sales

## Aggregate data

In [None]:
aggregrate_sales = df.groupby('Region')['Sales'].agg(['sum', 'mean', 'count'])
aggregrate_sales

## Check for missing data

In [None]:
df.isnull().sum()

## Replace missing data

In [None]:
a = df.select_dtypes('number')

b = df.select_dtypes('object')

df[a.columns] = a.fillna(a.mean())

df[b.columns] = b.fillna(b.agg(lambda x: x.mode().values[0]))

df

## Re-check for missing data

In [None]:
df.isnull().sum()

## Export dataset to a csv file

In [None]:
df.to_csv('clean_data.csv', index=False)