# Challenge question on 2nd 

1. Filter a DataFrame to only show values greater than the mean
2. Combine filters to find rows matching multiple Boolean conditions
3. Use isin() to filter categorical data like product types
4. Slice DataFrame with .loc[] using a Boolean filter
5. Chain complex filters with multiple Boolean operators

In [1]:
import pandas as pd
import ipynb.fs.full.Create_data_helper as cdh

# Create dataframe for storing weather data and load the data in dataframe
cdh.create_random_weather_data("weather_data.csv")
df = pd.read_csv("weather_data.csv")
print(df.head())


                         Date  Temperature  Humidity Weather Condition
0  2024-07-26 08:48:41.534691         24.5      70.3            cloudy
1  2024-07-25 08:48:41.534691         22.0      86.2             sunny
2  2024-07-24 08:48:41.534691         24.9      13.3             rainy
3  2024-07-23 08:48:41.534691         21.2      42.7             rainy
4  2024-07-22 08:48:41.534691         25.0      28.1             sunny


In [8]:
# 1. Filter a DataFrame to only show values greater than mean
print(f"Mean of the temperature is: {df['Temperature'].mean()}")
greater_mean_temp = df[df["Temperature"] > df["Temperature"].mean()]
print(greater_mean_temp)

Mean of the temperature is: 24.740000000000002
                          Date  Temperature  Humidity Weather Condition
3   2024-07-23 07:27:12.657350         29.0       3.2            cloudy
6   2024-07-20 07:27:12.657350         30.0      47.4            cloudy
7   2024-07-19 07:27:12.657350         26.6      89.4             rainy
8   2024-07-18 07:27:12.657350         25.1       8.1            cloudy
10  2024-07-16 07:27:12.657350         26.9      74.9            cloudy
14  2024-07-12 07:27:12.657350         29.7      47.7            cloudy
15  2024-07-11 07:27:12.657350         27.7      31.5            cloudy
16  2024-07-10 07:27:12.657350         26.8      87.0            cloudy
19  2024-07-07 07:27:12.657350         28.7      93.1             sunny
20  2024-07-06 07:27:12.657350         27.7      69.6             sunny
21  2024-07-05 07:27:12.657350         26.4       3.9             sunny
22  2024-07-04 07:27:12.657350         28.1      25.6             sunny
26  2024-06-30 07

In [11]:
# Combine filters to find rows matching multiple Boolean conditions
print(f"Mean of temperature: {df['Temperature'].mean()}")
print(f"Mean of humidity: {df['Humidity'].mean()}")

# Just create a condition for temperature and humidity
multi_filter_condition = (df["Temperature"] > df["Temperature"].mean()) & (df["Humidity"] > df["Humidity"].mean())
# Filter data based on existing condition and newly added condition.
multi_filter_data = df[multi_filter_condition & (df["Weather Condition"] == "sunny")]

print(multi_filter_data)

Mean of temperature: 24.740000000000002
Mean of humidity: 51.70666666666667
                          Date  Temperature  Humidity Weather Condition
19  2024-07-07 07:27:12.657350         28.7      93.1             sunny
20  2024-07-06 07:27:12.657350         27.7      69.6             sunny


In [13]:
# 3. Use isin() to filter categorical data like product types

# Filter weather data where the weather condition is either sunny or cloudy
weather_data = df[df["Weather Condition"].isin(["sunny", "cloudy"])].sort_values(by="Weather Condition")
print(weather_data)

                          Date  Temperature  Humidity Weather Condition
0   2024-07-26 07:27:12.657350         23.9      88.3            cloudy
25  2024-07-01 07:27:12.657350         23.0      93.6            cloudy
23  2024-07-03 07:27:12.657350         22.9      48.4            cloudy
18  2024-07-08 07:27:12.657350         24.0      29.8            cloudy
16  2024-07-10 07:27:12.657350         26.8      87.0            cloudy
15  2024-07-11 07:27:12.657350         27.7      31.5            cloudy
27  2024-06-29 07:27:12.657350         22.8      79.0            cloudy
10  2024-07-16 07:27:12.657350         26.9      74.9            cloudy
14  2024-07-12 07:27:12.657350         29.7      47.7            cloudy
8   2024-07-18 07:27:12.657350         25.1       8.1            cloudy
6   2024-07-20 07:27:12.657350         30.0      47.4            cloudy
4   2024-07-22 07:27:12.657350         22.3      38.9            cloudy
3   2024-07-23 07:27:12.657350         29.0       3.2           

In [14]:
# 4. Slice DataFrame with .loc[] using a Boolean filter
weather_data = df.loc[df["Weather Condition"] == "sunny"]
print(weather_data)

                          Date  Temperature  Humidity Weather Condition
1   2024-07-25 07:27:12.657350         21.3      48.0             sunny
5   2024-07-21 07:27:12.657350         24.2      27.3             sunny
9   2024-07-17 07:27:12.657350         21.7      75.6             sunny
12  2024-07-14 07:27:12.657350         21.1      37.5             sunny
13  2024-07-13 07:27:12.657350         21.8      18.8             sunny
19  2024-07-07 07:27:12.657350         28.7      93.1             sunny
20  2024-07-06 07:27:12.657350         27.7      69.6             sunny
21  2024-07-05 07:27:12.657350         26.4       3.9             sunny
22  2024-07-04 07:27:12.657350         28.1      25.6             sunny
24  2024-07-02 07:27:12.657350         22.7      96.0             sunny
28  2024-06-28 07:27:12.657350         25.1      14.8             sunny


In [23]:
# 5. Chain complex filters with multiple Boolean operators

cond1 = (df["Temperature"] > df["Temperature"].mean())
cond2 = (df["Weather Condition"] == "sunny")

weather_data = df.loc[cond1 & cond2]
print(weather_data)

                          Date  Temperature  Humidity Weather Condition
19  2024-07-07 07:27:12.657350         28.7      93.1             sunny
20  2024-07-06 07:27:12.657350         27.7      69.6             sunny
21  2024-07-05 07:27:12.657350         26.4       3.9             sunny
22  2024-07-04 07:27:12.657350         28.1      25.6             sunny
28  2024-06-28 07:27:12.657350         25.1      14.8             sunny
