# Week 1: Basic Pandas

## Creating a DataFrame from Lists

In [None]:
import pandas as pd
import numpy as np

In [None]:
# ----- Part 1: Python

names = ['Alice','Bob','Charlie','David']
ages = [25, 30, 35, 40]

# ----- Part 2: Data Frame

df = pd.DataFrame({'Name':names,
                   'Age':ages})

df

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35
3,David,40


## Indexing and Selection

In [None]:
names = df['Name']
print(names)

0      Alice
1        Bob
2    Charlie
3      David
Name: Name, dtype: object


In [None]:
second_row = df.iloc[1] # indexing location
print(second_row)

Name    Bob
Age      30
Name: 1, dtype: object


In [None]:
df[df['Age'] > 30]

Unnamed: 0,Name,Age
2,Charlie,35
3,David,40


## Creating a New (column, feature, variable) and Modifying DataFrame:

In [None]:
df['IsSenior'] = df['Age'] > 30
df

Unnamed: 0,Name,Age,IsSenior
0,Alice,25,False
1,Bob,30,False
2,Charlie,35,True
3,David,40,True


In [None]:
# Dropping the 'IsSenior' column
df_remove_issenior = df.drop('IsSenior', axis=1) # inplace=True

In [None]:
df_remove_issenior

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35
3,David,40


In [None]:
df

Unnamed: 0,Name,Age,IsSenior
0,Alice,25,False
1,Bob,30,False
2,Charlie,35,True
3,David,40,True


In [None]:
# Renaming variable

df.rename(columns={'Age':'AgeYears'}, inplace=True)

In [None]:
df

Unnamed: 0,Name,AgeYears,IsSenior
0,Alice,25,False
1,Bob,30,False
2,Charlie,35,True
3,David,40,True


## Sorting and Aggregation

In [None]:
sorted_df = df.sort_values(by='AgeYears', ascending=False)

In [None]:
sorted_df

Unnamed: 0,Name,AgeYears,IsSenior
3,David,40,True
2,Charlie,35,True
1,Bob,30,False
0,Alice,25,False


In [None]:
mean_age = df['AgeYears'].mean()

In [None]:
mean_age

32.5

## Counting Values

In [None]:
df['AgeYears'].value_counts()

25    1
30    1
35    1
40    1
Name: AgeYears, dtype: int64

## Filtering with Multiple Conditions

In [None]:
filter_df = df[(df['AgeYears'] > 25) & (df['AgeYears'] < 40)]

In [None]:
filter_df[['Name']]

Unnamed: 0,Name
1,Bob
2,Charlie


In [None]:
filter_df[['Name','IsSenior']]

Unnamed: 0,Name,IsSenior
1,Bob,False
2,Charlie,True


## Finding Maximum and Minimum Values:

In [None]:
df['AgeYears'].max()

40

In [None]:
df['AgeYears'].min()

25

In [None]:
df['AgeYears'].mean()

32.5

## Appling Functions to DataFrame:

In [None]:
df

Unnamed: 0,Name,AgeYears,IsSenior
0,Alice,25,False
1,Bob,30,False
2,Charlie,35,True
3,David,40,True


In [None]:
# Step1: Creating a function (Python Skill)

def double_age(age):
    return age*2

# Step2: Applying 'double_age' function to 'AgeYears column

df['DoubleAge'] = df['AgeYears'].apply(double_age)

In [None]:
df

Unnamed: 0,Name,AgeYears,IsSenior,DoubleAge
0,Alice,25,False,50
1,Bob,30,False,60
2,Charlie,35,True,70
3,David,40,True,80


## Checking for Null Values

In [None]:
df.isnull().any()

Name         False
AgeYears     False
IsSenior     False
DoubleAge    False
dtype: bool

## Uniques Values:

In [None]:
df['Name'].unique()

array(['Alice', 'Bob', 'Charlie', 'David'], dtype=object)

## Reading Data from CSV

In [None]:
file_path = 'https://raw.githubusercontent.com/kaopanboonyuen/SC310005_ArtificialIntelligence_2023s1/main/dataset/titanic_dataset.csv'

df = pd.read_csv(file_path)

In [None]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
df.shape

(891, 12)