<a href="https://colab.research.google.com/github/sunchushiva/python-pandas/blob/master/Pandas_Practice_problems_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

data = {
    'City': ['Delhi', 'Mumbai', 'Bangalore', 'Kolkata', 'Chennai'],
    'Temperature': [25, 30, 22, 28, 35],
    'AQI': [200, 100, 50, 120, 150],
    'Rainfall': [5, 200, 150, 100, 180]
}

cities = pd.Series(data['City'], name="City")
temperatures = pd.Series(data['Temperature'], index=data['City'], name="Temperature (°C)")
aqi = pd.Series(data['AQI'], index=data['City'], name="Air Quality Index (AQI)")
rainfall = pd.Series(data['Rainfall'], index=data['City'], name="Rainfall (mm)")

print(cities, "\n")
print(temperatures, "\n")
print(aqi, "\n")
print(rainfall, "\n")


0        Delhi
1       Mumbai
2    Bangalore
3      Kolkata
4      Chennai
Name: City, dtype: object 

Delhi        25
Mumbai       30
Bangalore    22
Kolkata      28
Chennai      35
Name: Temperature (°C), dtype: int64 

Delhi        200
Mumbai       100
Bangalore     50
Kolkata      120
Chennai      150
Name: Air Quality Index (AQI), dtype: int64 

Delhi          5
Mumbai       200
Bangalore    150
Kolkata      100
Chennai      180
Name: Rainfall (mm), dtype: int64 



# Practice Problems - EASY

## Basic Exploration
1. Print the cities Series.
2. Retrieve the first two cities using .head().
3. Get the index and values of the temperatures Series.



## Indexing
1. Retrieve the temperature of "Mumbai" from the temperatures Series.
2. Check if "Delhi" is in the index of the aqi Series.


## Descriptive Statistics
1. Find the average temperature using .mean().
2. Check the maximum value in the rainfall Series.


## Boolean Masking
- Filter cities with AQI above 100.

In [2]:
# Basic Exploration


# Print the cities Series.
print(cities, "\n")

# Retrieve the first two cities using .head().
print(cities.head(2), "\n")

# Get the index and values of the temperatures Series.
print(temperatures.index)
print(temperatures.values)

0        Delhi
1       Mumbai
2    Bangalore
3      Kolkata
4      Chennai
Name: City, dtype: object 

0     Delhi
1    Mumbai
Name: City, dtype: object 

Index(['Delhi', 'Mumbai', 'Bangalore', 'Kolkata', 'Chennai'], dtype='object')
[25 30 22 28 35]


In [3]:
# Indexing


# Retrieve the temperature of "Mumbai" from the temperatures Series.
mumbai = cities[cities == "Mumbai"]
mumbai = mumbai.index[0] # get the index of the data
print(temperatures.iloc[mumbai], "\n")

# Check if "Delhi" is in the index of the aqi Series.
isDelhi = aqi.index
isDelhiOne = "Delhi" in isDelhi
print(isDelhiOne, "\n")
isDelhiTwo = "dlehi" in isDelhi
print(isDelhiTwo)


30 

True 

False


In [4]:
# Descriptive Statistics


# Find the average temperature using .mean().
mean_temperature = temperatures.mean()
print(mean_temperature, "\n")

# Check the maximum value in the rainfall Series.
rainfall_val = rainfall.values
max_rainfall = rainfall_val.max()
print(max_rainfall)

28.0 

200


In [5]:
# Boolean Masking


# Filter cities with AQI above 100.
high_aqi = aqi[aqi > 100]
print(high_aqi)

Delhi      200
Kolkata    120
Chennai    150
Name: Air Quality Index (AQI), dtype: int64


# Practice Problems - MEDIUM


## Transformation
- Convert the temperatures Series from Celsius to Fahrenheit using formula: Fahrenheit = (Celsius * 9/5) + 32

## Sorting
1. Sort the aqi Series by values in descending order.
2. Sort the rainfall Series by its index.

## Missing Data
1. Add a missing value (None) for "Chennai" in the temperatures Series.
2. Fill the missing value with the average temperature.

## Mapping

- Create a new Series where each city's AQI is labeled as "Good", "Moderate", or "Poor":
  - Good: AQI <= 50
  - Moderate: 51 <= AQI <= 150
  - Poor: AQI > 150

In [6]:
# Transformation


# Convert the temperatures Series from Celsius to Fahrenheit

def celToFah(celsius):
  return (celsius * 1.8) + 32

print(temperatures, "\n")
temperatures = temperatures.apply(celToFah)
temperatures.name = "Temperature (°F)"
print(temperatures)


Delhi        25
Mumbai       30
Bangalore    22
Kolkata      28
Chennai      35
Name: Temperature (°C), dtype: int64 

Delhi        77.0
Mumbai       86.0
Bangalore    71.6
Kolkata      82.4
Chennai      95.0
Name: Temperature (°F), dtype: float64


In [7]:
# Sorting


# Sort the aqi Series by values in descending order.
print(aqi, "\n")
sorted_aqi = aqi.sort_values(ascending=False)
print(sorted_aqi, "\n \n")

# Sort the rainfall Series by its index.
print(rainfall, "\n")
sorted_rainfall = rainfall.sort_index(ascending=False)
print(sorted_rainfall)

Delhi        200
Mumbai       100
Bangalore     50
Kolkata      120
Chennai      150
Name: Air Quality Index (AQI), dtype: int64 

Delhi        200
Chennai      150
Kolkata      120
Mumbai       100
Bangalore     50
Name: Air Quality Index (AQI), dtype: int64 
 

Delhi          5
Mumbai       200
Bangalore    150
Kolkata      100
Chennai      180
Name: Rainfall (mm), dtype: int64 

Mumbai       200
Kolkata      100
Delhi          5
Chennai      180
Bangalore    150
Name: Rainfall (mm), dtype: int64


In [8]:
# Missing Data


# Add a missing value (None) for "Chennai" in the temperatures Series.
temperatures["Chennai"] = None
print(temperatures, "\n")

# Fill the missing value with the average temperature.
average_temperature = temperatures.mean()
temperatures = temperatures.fillna(average_temperature)
print(temperatures)


Delhi        77.0
Mumbai       86.0
Bangalore    71.6
Kolkata      82.4
Chennai       NaN
Name: Temperature (°F), dtype: float64 

Delhi        77.00
Mumbai       86.00
Bangalore    71.60
Kolkata      82.40
Chennai      79.25
Name: Temperature (°F), dtype: float64


In [9]:
# Mapping


# Create a new Series where each city's AQI is labeled as "Good", "Moderate", or "Poor":
# Good: AQI <= 50
# Moderate: 51 <= AQI <= 150
# Poor: AQI > 150


def getPlace(index):
  return cities.iloc[index]

new_dict = {}
aqi_list = aqi.values

for new_aqi in range(len(aqi_list)):
  place = getPlace(new_aqi)
  aqi_value = aqi_list[new_aqi]

  if(aqi_value <= 50):
    aqi_value = "Good"
  elif(51 <= aqi_value <= 150):
    aqi_value = "Moderate"
  else:
    aqi_value = "Poor"

  new_dict[place] = aqi_value

new_series = pd.Series(new_dict, name="AQI indexed")
print(new_series)

print(type(aqi))


Delhi            Poor
Mumbai       Moderate
Bangalore        Good
Kolkata      Moderate
Chennai      Moderate
Name: AQI indexed, dtype: object
<class 'pandas.core.series.Series'>


# Practice Problems - HARD


## Custom Function
- Write a custom function to classify rainfall into categories:
  - Low: Rainfall <= 50
  - Moderate: 51 <= Rainfall <= 150
  - High: Rainfall > 150
    Apply this function to the rainfall Series.

## Aggregation
- Create a Series combining temperature, aqi, and rainfall for each city using a tuple for values (e.g., (25, 200, 5)).

## Time-Series Analysis (Synthetic Data)
- Extend the temperatures Series with a 7-day synthetic time-series data for "Delhi".
- Compute the rolling average temperature over a 3-day window.

## Chaining Operations
- Chain the following operations on the aqi Series:
  - Filter cities with AQI > 100.
  - Sort the filtered values.
  - Replace values above 150 with the string "Critical".


In [10]:
# Custom Function


# Write a custom function to classify rainfall into categories:
# Low: Rainfall <= 50
# Moderate: 51 <= Rainfall <= 150
# High: Rainfall > 150 Apply this function to the rainfall Series.

def customFun(rainfall):
  if(rainfall <= 50):
    return "Low"
  elif(51 <= rainfall <= 150):
    return "Moderate"
  else:
    return "High"

rainfall = rainfall.apply(customFun)
rainfall.name = "Rain level"
print(rainfall)

Delhi             Low
Mumbai           High
Bangalore    Moderate
Kolkata      Moderate
Chennai          High
Name: Rain level, dtype: object


In [11]:
# Aggregation


# Create a Series combining temperature, aqi, and rainfall for each city using a tuple for values (e.g., (25, 200, 5)).

comb_dict = {}
places = cities.values

for city in range(len(places)):
  place_aqi = aqi.iloc[city]
  place_temperature = temperatures.iloc[city]
  place_rainfall = rainfall.iloc[city]
  place = places[city]
  new_tuple = (place_temperature, place_aqi, place_rainfall)

  comb_dict[place] = new_tuple


combination_series = pd.Series(comb_dict, name="Combination of AQI, temperature and rainfall")

print(combination_series)

Delhi             (77.0, 200, Low)
Mumbai           (86.0, 100, High)
Bangalore     (71.6, 50, Moderate)
Kolkata      (82.4, 120, Moderate)
Chennai         (79.25, 150, High)
Name: Combination of AQI, temperature and rainfall, dtype: object


In [12]:
# Time-Series Analysis (Synthetic Data)


# Extend the temperatures Series with a 7-day synthetic time-series data for "Delhi".
# Compute the rolling average temperature over a 3-day window.

In [13]:
# Chaining Operations


# Chain the following operations on the aqi Series:
# Filter cities with AQI > 100.
# Sort the filtered values.
# Replace values above 150 with the string "Critical".

chain_dict = {}
places = cities.values

for city in range(len(places)):
  if(aqi.iloc[city] > 100):
    place_aqi = aqi.iloc[city]
    place = places[city]

    chain_dict[place] = place_aqi


chained_series = pd.Series(chain_dict, name="Filtered values")
chained_series = chained_series.sort_values(ascending=False)

chained_series = chained_series.astype('object') # to avoid type error of assigning str to an int dtype
chained_value = chained_series.values

for value in range(len(chained_value)):
  if(chained_value[value] > 150):
    chained_series.iloc[value] = "Critical"

print(chained_series)



Delhi      Critical
Chennai         150
Kolkata         120
Name: Filtered values, dtype: object
