In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('EuCitiesTemperatures.csv')

#part 1
data['latitude'] = data.groupby('country')['latitude'].transform(lambda x: x.fillna(round(x.mean(), 2)))
data['longitude'] = data.groupby('country')['longitude'].transform(lambda x: x.fillna(round(x.mean(), 2)))

# part 2
geoBand = data[(data['latitude'].between(40, 60)) & (data['longitude'].between(15, 30))]
countryCounts = geoBand['country'].value_counts()
maxCount = countryCounts.max()
topCountries = countryCounts[countryCounts == maxCount]
#prints part 2 data 
print("Countries with the most cities in the geographic band (lat 40â€“60, long 15â€“30):")
print(topCountries)
print("\nList of cities in the geographic band:")
print(geoBand[['city', 'country', 'latitude', 'longitude']])

#part 3 
data['regionType'] = data['EU'] + data['coastline'].apply(lambda x: x.capitalize())
regionTempAvg = data.groupby('regionType')['temperature'].mean()
def fillTemperature(row):
    if pd.isnull(row['temperature']):
        return round(regionTempAvg[row['regionType']], 2)
    return row['temperature']
data['temperature'] = data.apply(fillTemperature, axis=1)

#saves to new csv but not sure if i have to update the original one
data.to_csv('EuCitiesTemperatures_updated.csv', index=False)

#plot 1, bar chart 
plt.figure(figsize=(8, 6))
data['regionType'].value_counts().plot(kind='bar')
plt.title('Number of Cities by Region Type')
plt.xlabel('Region Type (EUCoastline)')
plt.ylabel('Number of Cities')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

# plot2, scatter plot for lat and long
plt.figure(figsize=(10, 8))
countries = data['country'].unique()
for country in countries:
    countrySubset = data[data['country'] == country]
    plt.scatter(countrySubset['longitude'], countrySubset['latitude'], label=country, s=25)
plt.title('City Distribution by Latitude and Longitude')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
plt.tight_layout()
plt.show()

# plot 3 histogram for number of countries by population 
countryPopulation = data[['country', 'population']].drop_duplicates()
plt.figure(figsize=(8, 6))
plt.hist(countryPopulation['population'], bins=5, edgecolor='black')
plt.title('Number of Countries by Population Group')
plt.xlabel('Population (in millions)')
plt.ylabel('Number of Countries')
plt.grid(axis='y')
plt.tight_layout()
plt.show()

#plot 4, subplots with color coded temps
def tempColor(temp):
    if temp > 10:
        return 'red'
    elif temp < 6:
        return 'blue'
    else:
        return 'orange'

data['tempColor'] = data['temperature'].apply(tempColor)

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
regionTypes = data['regionType'].unique()
axes = axes.flatten()

for i, region in enumerate(regionTypes):
    regionSubset = data[data['regionType'] == region].reset_index(drop=True)
    xVals = range(len(regionSubset))
    colors = regionSubset['tempColor']

    axes[i].scatter(xVals, regionSubset['latitude'], c=colors)
    axes[i].set_title(f'Region: {region}')
    axes[i].set_xlabel('City Index')
    axes[i].set_ylabel('Latitude')
    axes[i].set_xticks(range(len(regionSubset)))
    axes[i].grid(True)

plt.tight_layout()
plt.show()
