# EU City Temperatures

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Preprocessing/analysis

### Load the CSV file

In [None]:
# TODO
df = pd.read_csv("EuCitiesTemperatures.csv")
df

### Fill in the missing latitude and longitude values

In [None]:
# TODO
df['latitude'] = df.groupby('country')['latitude'].transform(lambda x: x.fillna(x.mean())).round(2)
df['longitude'] = df.groupby('country')['longitude'].transform(lambda x: x.fillna(x.mean())).round(2)
df

### Find the subset of cities in latitudes 40 - 60 and longitudes 15 - 30

In [None]:
# TODO

subset = df[(df['latitude'] >= 40) & (df['latitude'] <= 60) & (df['longitude'] >= 15) & (df['longitude'] <= 30)]

subset

### Find the countries with a maximal number of such cities

In [None]:
# TODO
subset = df[(df['latitude'] >= 40) & (df['latitude'] <= 60) & (df['longitude'] >= 15) & (df['longitude'] <= 30)]

# Countries with maximal number of cities in the subset
max_countries = subset['country'].value_counts().loc[lambda x: x == x.max()].index.tolist()
max_countries

### Fill in the missing temperature values

In [None]:
# TODO
df['temperature'] = df.groupby(['EU', 'coastline'])['temperature'].transform(lambda x: x.fillna(x.mean()))
df.head(10)

## Visualization

### Bar chart of number of cities in each region

In [None]:
# TODO


# Count the number of cities in each region
region_counts = df.groupby(['EU', 'coastline']).size()

# Create a bar chart of the region counts
region_counts.plot(kind='bar')

# Set the title and axis labels
plt.title('Number of Cities in Each Region')
plt.xlabel('Region')
plt.ylabel('Number of Cities')

# Set the x-axis labels
plt.xticks(range(len(region_counts)), region_counts.index)

# Show the plot
plt.show()


### Scatter plot of cities' latitude/longitude

In [None]:
# TODO
groups = df.groupby('country')
plt.figure(figsize=(10,5))
for name, group in groups:
    plt.plot(group['longitude'],group['latitude'], marker = 'o', linestyle = '', label = name)
plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.40),ncol= 10)
plt.ylabel('Latitude')
plt.xlabel('Longitude')
plt.title("Cities' Latitude vs Longitude")
plt.show()

### Histogram of country population

In [None]:
# TODO

population = np.array(df['population'].unique()).astype(int)

plt.figure(figsize=(8, 8))
plt.hist(population,bins=5,edgecolor = 'black')
plt.xticks(range(0,population.max()+1,round(population.max()/5)))
plt.ylabel('Number of Countries', fontsize=14)
plt.xlabel('Population Groups', fontsize=14)
plt.title('Population values unique to each country.', fontsize=14)
plt.show()

### Scatter plots of city vs latitude for each region

In [None]:
# TODO

EYCY = {}
EYCN = {}
ENCY = {}
ENCN = {}
c1 = []
c2 = []
c3 = []
c4 = []
for index, row in df.iterrows():
    if row['EU'] == 'yes' and row['coastline'] == 'yes':
        EYCY[row['city']] = row['latitude']
        if row['temperature'] > 10:
            c1.append('red')
        elif row['temperature'] < 6:
            c1.append('blue')
        else:
            c1.append('orange')
    elif row['EU'] == 'yes' and row['coastline'] == 'no':
        EYCN[row['city']] = row['latitude']
        if row['temperature'] > 10:
            c2.append('red')
        elif row['temperature'] < 6:
            c2.append('blue')
        else:
            c2.append('orange')
    elif row['EU'] == 'no' and row['coastline'] == 'yes':
        ENCY[row['city']] = row['latitude']
        if row['temperature'] > 10:
            c3.append('red')
        elif row['temperature'] < 6:
            c3.append('blue')
        else:
            c3.append('orange')
    elif row['EU'] == 'no' and row['coastline'] == 'no':
        ENCN[row['city']] = row['latitude']
        if row['temperature'] > 10:
            c4.append('red')
        elif row['temperature'] < 6:
            c4.append('blue')
        else:
            c4.append('orange')
fig, ax = plt.subplots(2,2,figsize=(25,10))
ax[0,0].scatter(np.arange(0,len(EYCY)),EYCY.values(),c = c1)
ax[0,0].set_title('EU yes Coastline yes')
ax[0,0].set_xlabel('City Number')
ax[0,0].set_ylabel('Latitude')
ax[0,1].scatter(np.arange(0,len(EYCN)),EYCN.values(),c = c2)
ax[0,1].set_title('EU yes Coastline no')
ax[0,1].set_xlabel('City Number')
ax[0,1].set_ylabel('Latitude')
ax[1,0].scatter(np.arange(0,len(ENCY)),ENCY.values(),c = c3)
ax[1,0].set_title('EU no Coastline yes')
ax[1,0].set_xlabel('City Number')
ax[1,0].set_ylabel('Latitude')
ax[1,1].scatter(np.arange(0,len(ENCN)),ENCN.values(),c = c4)
ax[1,1].set_title('EU no Coastline no')
ax[1,1].set_xlabel('City Number')
ax[1,1].set_ylabel('Latitude')
plt.sca(ax[0,0])
plt.xticks(np.arange(0,len(EYCY)))
plt.sca(ax[0,1])
plt.xticks(np.arange(0,len(EYCN)))
plt.sca(ax[1,0])
plt.xticks(np.arange(0,len(ENCY)))
plt.sca(ax[1,1])
plt.xticks(np.arange(0,len(ENCN)))
plt.show()
