In [24]:
import numpy as np
import pandas as pd

np.random.seed(0)  # For reproducibility
temp_data = np.random.uniform(-10, 40, (500, 365))  # Celsius temperatures
humid_data = np.random.uniform(0, 100, (500, 365))  # Humidity percentages

# Checkimg for Missing Data
# Set 5% of values in each array to NaN to simulate missing data
miss_per = 0.05
temp_missing_indices = np.random.choice(temp_data.size, int(miss_per * temp_data.size), replace=False)
humidity_missing_indices = np.random.choice(humid_data.size, int(miss_per * humid_data.size), replace=False)

temp_data.ravel()[temp_missing_indices] = np.nan
humid_data.ravel()[humidity_missing_indices] = np.nan

# Counting missing values
temp_missing_count = np.isnan(temp_data).sum()
humidity_missing_count = np.isnan(humid_data).sum()

print(f"Total missing temperature entries: {temp_missing_count}")
print(f"Total missing humidity entries: {humidity_missing_count}")

# Convertimg Temperature and Calculate Discomfort Index
# Convert Celsius to Fahrenheit
temp_f = temp_data * 9 / 5 + 32

# Calculate discomfort index: basic formula combining temperature and humidity
discomfort_index = 0.5 * (temp_data + humid_data)
discomfort_index = np.where(discomfort_index > 80, 80, discomfort_index)

#Analyzing January Temperatures
# Extract January temperatures and calculate the average
jan_temp = temp_data[:, :31]
jan_avg_temp = np.nanmean(jan_temp)

print(f"Average January temperature across all locations: {jan_avg_temp:.2f} °C")

# Identify Extreme Temperatures
# Mark temperatures over 35°C as potential errors by setting them to NaN
temp_data[temp_data > 35] = np.nan

# Count the number of NaNs per location (after marking)
null_per_loc = np.isnan(temp_data).sum(axis=1)

#Calculate Quarterly Temperature Averages
# Divide temperature_data into quarters by slicing
winter = temp_data[:, :90]  # Days 1-90
spring = temp_data[:, 90:181]  # Days 91-181
summer = temp_data[:, 181:273]  # Days 182-273
autumn = temp_data[:, 273:]  # Days 274-365

# Calculate the average temperature for each location in each quarter
winter_avg = np.nanmean(winter, axis=1)
spring_avg = np.nanmean(spring, axis=1)
summer_avg = np.nanmean(summer, axis=1)
autumn_avg = np.nanmean(autumn, axis=1)

# Combine quarterly averages into a single array for each location
quarterly_avg_temp = np.stack((winter_avg, spring_avg, summer_avg, autumn_avg), axis=1)

print(f"Quarterly average temperatures (first location): {quarterly_avg_temp[0]}")

# Classify Humidity Levels
# Classify humidity levels as "Dry" (<30%) or "Humid" (>70%)
dry_days = (humid_data < 30).sum(axis=1)
humid_days = (humid_data > 70).sum(axis=1)

print(f"Dry days per location (first 5 locations): {dry_days[:5]}")
print(f"Humid days per location (first 5 locations): {humid_days[:5]}")

# Apply Daily Pressure Trend to Temperature Data
# Create a daily pressure trend across 365 days
pressure_trend = np.linspace(-5, 5, 365)  # Simulating a mild pressure effect over the year

# Apply the pressure trend to adjust temperatures at each location
adj_temp_data = temp_data + pressure_trend

print(f"Adjusted temperatures (first location, first 5 days): {adj_temp_data[0, :5]}")


Total missing temperature entries: 9125
Total missing humidity entries: 9125
Average January temperature across all locations: 15.05 °C
Quarterly average temperatures (first location): [10.82759911 13.90594242 10.02120689 10.56241871]
Dry days per location (first 5 locations): [ 96 112 110 108  95]
Humid days per location (first 5 locations): [113 114 117 105 115]
Adjusted temperatures (first location, first 5 days): [12.4406752  20.78694085 15.19311386         nan  6.29263008]
