# <strong>YZV 202E - OPTIMIZATION FOR DATA SCIENCE PROJECT</strong>
# Importance of Optimization Techniques in Post-Earthquake Relief

## Team: Iron-Flag
## Team Members: Mustafa Bayrak, Zehra Demir

## Data Cleaning 

Importing necessary libraries

In [633]:
import pandas as pd
import numpy as np

### First Dataset

In [634]:
df  = pd.read_excel("../Datasets/Sehirler.xlsx")

In [635]:
df.head()

Unnamed: 0,Id,Name,Affected
0,1,Adana,1
1,2,Adıyaman,1
2,3,Afyonkarahisar,0
3,4,Ağrı,0
4,5,Amasya,0


In [636]:
df.isna().sum()

Id          0
Name        0
Affected    0
dtype: int64

### Second Dataset

In [637]:
distances = pd.read_excel("../Datasets/ilmesafe.xlsx")

In [638]:
distances.fillna(0,inplace=True)

### Third Dataset

In [639]:
population = pd.read_excel("../Datasets/population.xlsx")

In [640]:
population.head()

Unnamed: 0,Name,Population
0,Adana,2258718
1,Adıyaman,632459
2,Afyonkarahisar,736912
3,Ağrı,535435
4,Aksaray,423011


In [641]:
merged_df = pd.merge(df, population, on='Name')
merged_df

Unnamed: 0,Id,Name,Affected,Population
0,1,Adana,1,2258718
1,2,Adıyaman,1,632459
2,3,Afyonkarahisar,0,736912
3,4,Ağrı,0,535435
4,5,Amasya,0,335494
...,...,...,...,...
76,77,Yalova,0,276050
77,78,Karabük,0,243614
78,79,Kilis,1,142792
79,80,Osmaniye,1,548556


In [642]:
merged_df.sort_values('Population', ascending=False,inplace= True)
dist_centers = merged_df.head(15)

In [643]:
dist_centers

Unnamed: 0,Id,Name,Affected,Population
33,34,İstanbul,0,15462452
5,6,Ankara,0,5663322
34,35,İzmir,0,4394694
15,16,Bursa,0,3101833
6,7,Antalya,0,2548308
0,1,Adana,1,2258718
41,42,Konya,0,2250020
62,63,Şanlıurfa,1,2115256
26,27,Gaziantep,1,2101157
40,41,Kocaeli,0,1997258


In [644]:
def filter_dataframe(df, x_column, y_column):
    filtered_values = df.loc[df[y_column] == 0, x_column]
    return filtered_values

dist_centers = filter_dataframe(dist_centers, 'Name','Affected')
# make dist_centers a dataframe
dist_centers = pd.DataFrame(dist_centers)
dist_centers
dist_df = pd.merge(dist_centers, merged_df, on='Name')
dist_df.drop(['Id'], axis=1, inplace=True)
dist_df
# add a column "distribution" and set it to 1 for all rows
dist_df['Distribution'] = 1
dist_df

Unnamed: 0,Name,Affected,Population,Distribution
0,İstanbul,0,15462452,1
1,Ankara,0,5663322,1
2,İzmir,0,4394694,1
3,Bursa,0,3101833,1
4,Antalya,0,2548308,1
5,Konya,0,2250020,1
6,Kocaeli,0,1997258,1
7,Mersin,0,1868757,1
8,Manisa,0,1450616,1
9,Kayseri,0,1421455,1


In [645]:
merged_df['Dist'] = 0

In [646]:
for city in merged_df['Name']:
    if city in dist_df['Name'].values:
        merged_df.loc[merged_df['Name'] == city, 'Dist'] = 1

In [647]:
merged_df

Unnamed: 0,Id,Name,Affected,Population,Dist
33,34,İstanbul,0,15462452,1
5,6,Ankara,0,5663322,1
34,35,İzmir,0,4394694,1
15,16,Bursa,0,3101833,1
6,7,Antalya,0,2548308,1
...,...,...,...,...,...
78,79,Kilis,1,142792,0
28,29,Gümüşhane,0,141702,0
74,75,Ardahan,0,96161,0
61,62,Tunceli,0,83443,0


In [648]:
merged_df.reset_index(inplace=True,drop=True)

In [649]:
merged_df.drop(["Id"],axis=1,inplace=True)

In [650]:
merged_df = merged_df[["Name","Affected","Dist","Population"]]

### Final dataset to be used in model

In [651]:
merged_df

Unnamed: 0,Name,Affected,Dist,Population
0,İstanbul,0,1,15462452
1,Ankara,0,1,5663322
2,İzmir,0,1,4394694
3,Bursa,0,1,3101833
4,Antalya,0,1,2548308
...,...,...,...,...
76,Kilis,1,0,142792
77,Gümüşhane,0,0,141702
78,Ardahan,0,0,96161
79,Tunceli,0,0,83443


In [652]:
affected_cities = merged_df[merged_df["Affected"] == 1][["Name","Population"]]
affected_city_names = np.array(affected_cities["Name"].values.tolist())

In [653]:
np.save('affected_city_names.npy', affected_city_names)

In [654]:
# affected_cities = np.array(merged_df[merged_df["Affected"] == 1]["Name"].str.title().to_list())
# np.save('affected_cities.npy', affected_cities)

In [655]:
dist_cities = np.array(merged_df[merged_df["Dist"] == 1]["Name"].str.title().to_list())
np.save('dist_cities.npy', dist_cities)

Distances between distribution centers to each affected city

In [656]:
affected_df = pd.DataFrame(columns=distances.columns)
# Iterate over each row in the original dataframe
for index, row in distances.iterrows():
    if row['Name'] == "Kahramanmaraş":
        # Append the row to the new dataframe
        affected_df = pd.concat([affected_df, row.to_frame().T])

# Reset the index of the new dataframe
affected_df.reset_index(drop=True, inplace=True)
dist_maras = affected_df[affected_city_names]
dist_maras.at[0, "Kahramanmaraş"] = 10.0

In [657]:
affected_cities

Unnamed: 0,Name,Population
5,Adana,2258718
7,Şanlıurfa,2115256
8,Gaziantep,2101157
11,Diyarbakır,1783431
12,Hatay,1659320
17,Kahramanmaraş,1168163
27,Malatya,806156
32,Adıyaman,632459
36,Elazığ,587960
38,Osmaniye,548556


In [658]:
affected_df

Unnamed: 0,Name,Adana,Adıyaman,Afyonkarahisar,Ağrı,Amasya,Ankara,Antalya,Artvin,Aydın,...,Batman,Şırnak,Bartın,Ardahan,Iğdır,Yalova,Karabük,Kilis,Osmaniye,Düzce
0,Kahramanmaraş,197.0,162.0,776.0,807.0,501.0,572.0,744.0,827.0,1077.0,...,460.0,574.0,856.0,862.0,908.0,978.0,787.0,141.0,105.0,807.0


Create demands array by using population and distance to Kahramanmaraş

In [659]:
dist_maras

Unnamed: 0,Adana,Şanlıurfa,Gaziantep,Diyarbakır,Hatay,Kahramanmaraş,Malatya,Adıyaman,Elazığ,Osmaniye,Kilis
0,197.0,217.0,78.0,368.0,176.0,10.0,222.0,162.0,319.0,105.0,141.0


In [660]:
# Extract the 'Distance' values from the second dataframe
distance_values = dist_maras.iloc[0].values

# Add the 'Distance' column to the first dataframe
affected_cities['Distance'] = distance_values

In [661]:
affected_cities

Unnamed: 0,Name,Population,Distance
5,Adana,2258718,197.0
7,Şanlıurfa,2115256,217.0
8,Gaziantep,2101157,78.0
11,Diyarbakır,1783431,368.0
12,Hatay,1659320,176.0
17,Kahramanmaraş,1168163,10.0
27,Malatya,806156,222.0
32,Adıyaman,632459,162.0
36,Elazığ,587960,319.0
38,Osmaniye,548556,105.0


In [662]:
demands = np.array(affected_cities['Population'] * affected_cities['Distance']*(11/3.14)/ 10**6)

In [663]:
# Convert each value in the array to an integer
demands = demands.astype(int)

In [664]:
np.save('demands.npy', demands)

Create supplies array

In [665]:
supplies = dist_df["Population"].values.tolist()
supplies = [int(i*(9/4)/10**4) for i in supplies]

In [666]:
np.save('supplies.npy', supplies)

Define max helicopter and truck capacities

In [667]:
max_helicopter = [(2.5/10* i) for i in supplies]

In [668]:
np.save('max_helicopter.npy', max_helicopter)

In [669]:
max_truck = [7.5/10* i for i in supplies]

In [670]:
np.save('max_truck.npy', max_truck)