In [2]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('forestfires.csv')

# a. Create data subsets by making classes for amount of region affected
# Calculate median of non-zero areas
non_zero_areas = df[df['area'] > 0]['area']
median_non_zero = non_zero_areas.median()

# Create classes based on area
df['area_class'] = np.where(df['area'] == 0, 'NotAffected',
                           np.where(df['area'] <= median_non_zero, 'PartiallyAffected', 'MostlyAffected'))

# Create subsets
not_affected = df[df['area_class'] == 'NotAffected']
partially_affected = df[df['area_class'] == 'PartiallyAffected']
mostly_affected = df[df['area_class'] == 'MostlyAffected']

# b. Merge two subsets (PartiallyAffected and MostlyAffected)
merged_subsets = pd.concat([partially_affected, mostly_affected], axis=0)

# c. Sort Data using Temperature, Wind, and Area
sorted_df = df.sort_values(by=['temp', 'wind', 'area'], ascending=[True, True, True])

# d. Transposing Data
transposed_df = df.transpose()

# e. Melting Data to long format
melted_df = pd.melt(df, id_vars=['month'],
                    value_vars=['temp', 'wind', 'area', 'RH', 'rain'],
                    var_name='measurement', value_name='value')

# f. Casting data to wide format
wide_df = melted_df.pivot_table(index='month', columns='measurement', values='value', aggfunc='mean')
wide_df = wide_df.reset_index()

# Print results to verify
print("Not Affected Subset:\n", not_affected.head())
print("Partially Affected Subset:\n", partially_affected.head())
print("Mostly Affected Subset:\n", mostly_affected.head())
print("Merged Subsets (Partially and Mostly Affected):\n", merged_subsets.head())
print("Sorted by Temp, Wind, Area:\n", sorted_df.head())
print("Transposed DataFrame:\n", transposed_df)
print("Melted DataFrame:\n", melted_df.head())
print("Wide Format DataFrame:\n", wide_df)

Not Affected Subset:
    X  Y month  day  FFMC   DMC     DC  ISI  temp  RH  wind  rain  area  \
0  7  5   mar  fri  86.2  26.2   94.3  5.1   8.2  51   6.7   0.0   0.0   
1  7  4   oct  tue  90.6  35.4  669.1  6.7  18.0  33   0.9   0.0   0.0   
2  7  4   oct  sat  90.6  43.7  686.9  6.7  14.6  33   1.3   0.0   0.0   
3  8  6   mar  fri  91.7  33.3   77.5  9.0   8.3  97   4.0   0.2   0.0   
4  8  6   mar  sun  89.3  51.3  102.2  9.6  11.4  99   1.8   0.0   0.0   

    area_class  
0  NotAffected  
1  NotAffected  
2  NotAffected  
3  NotAffected  
4  NotAffected  
Partially Affected Subset:
      X  Y month  day  FFMC    DMC     DC   ISI  temp  RH  wind  rain  area  \
138  9  9   jul  tue  85.8   48.3  313.4   3.9  18.0  42   2.7   0.0  0.36   
139  1  4   sep  tue  91.0  129.5  692.6   7.0  21.7  38   2.2   0.0  0.43   
140  2  5   sep  mon  90.9  126.5  686.5   7.0  21.9  39   1.8   0.0  0.47   
141  1  2   aug  wed  95.5   99.9  513.3  13.2  23.3  31   4.5   0.0  0.55   
142  8  6   a