In [23]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta

In [2]:
df = pd.read_csv("owid-covid-data.csv")
vaccinepercapita = pd.read_csv("daily-covid-vaccination-doses-per-capita.csv")

In [3]:
def load_data(url,table_no,header):
     html = pd.read_html(url, header = header)
     df_return = html[table_no]
     return df_return

In [4]:
df_cities = load_data('https://tr.wikipedia.org/wiki/T%C3%BCrkiye%27nin_illeri',3,0)
df_cities.tail()

Unnamed: 0,Ad,Alan (km²)[8],Nüfus (2019)[7],NY kişi/km²,Plaka kodu,Telefon kodu,Vali
76,Uşak,5.555,370.509,6669.0,64,276,Funda Kocabıyık
77,Van,20.921,1.136.757,5433.0,65,432,Mehmet Emin Bilmez
78,Yalova,798.0,270.976,33956.0,77,226,Muammer Erol
79,Yozgat,13.69,421.200,3076.0,66,354,Ziya Polat
80,Zonguldak,3.342,596.053,17835.0,67,372,Mustafa Tutulmaz


In [5]:
# Getting city names of Turkey from Wikipedia

df_cities = df_cities[['Ad','Plaka kodu']]
df_cities.rename(columns={'Ad':'city_name','Plaka kodu':'plate_no'},inplace=True)

In [6]:
# Getting latitude and longitude with city names

from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="http")
df_cities["Latitude"] = 0.0
df_cities["Longitude"] = 0.0
for i in df_cities.index:
    location = geolocator.geocode(df_cities['city_name'][i])
    try:
        df_cities["Latitude"][i] = location.latitude
    except:
        df_cities["Latitude"][i] = None
    try:
        df_cities["Longitude"][i] = location.longitude
    except:
        df_cities["Longitude"][i] = None

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cities["Latitude"][i] = location.latitude
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cities["Longitude"][i] = location.longitude


In [7]:
# Cechking Nulls for latitude and longitude

df_cities.Latitude.isnull().sum()

0

In [8]:
# One city has conflicted coordinates, so we fix it manually

df_cities['Latitude'][77]=38.5012
df_cities['Longitude'][77]=43.3730
df_cities.tail()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cities['Latitude'][77]=38.5012
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cities['Longitude'][77]=43.3730


Unnamed: 0,city_name,plate_no,Latitude,Longitude
76,Uşak,64,38.609666,29.330651
77,Van,65,38.5012,43.373
78,Yalova,77,40.592707,29.077136
79,Yozgat,66,39.820557,34.809492
80,Zonguldak,67,41.250324,31.838974


In [9]:
df_risk = load_data('https://www.haberturk.com/il-il-koronavirus-risk-haritasi-16-mart-dusuk-orta-yuksek-ve-cok-yuksek-riskli-illerin-listesi-gunce-3006818',0,None)
df_risk.head()

Unnamed: 0,İl Adı,Sayı
0,Adana,6300
1,Adıyaman,15463
2,Afyon,4750
3,Ağrı,2222
4,Aksaray,17352


In [10]:
# Loading links from "link.txt" file.

links = open("links.txt","r")
brokenlinks = open("b_links.txt","r")

# Splitting links line by line.

link_array = links.read().splitlines()

In [11]:
# A function for inserting values from one DataFrame to another by matching values.

def insert_by_match(df1,df2,col_name1,col_name2,col_name_to_insert,value_col_name):
    for i in df1.index:
        for j in df2.index:
            if df1[col_name1][i] == df2[col_name2][j]:
                df1[col_name_to_insert][i] = df2[value_col_name][j]
        

In [52]:
# A function for inserting values from one DataFrame to another by index.

def insert_by_index(df1,df2,col_name_to_insert,value_col_name,date,week_no):
    for i in df1.index:
        df1[col_name_to_insert][i] = df2[value_col_name][i]/100
        df1["week"][i] = date
        df1["week_no"] = week_no


In [53]:
# Scraping the weekly number of cases per/100k for each city. 

count = 0
df_cities_new = pd.DataFrame(columns=['city_name','plate_no','Latitude','Longitude','value'])
for link,i in zip(link_array,range(len(link_array))):
    count = count + 1
    
    df_cities_copy = df_cities
    df_cities_copy["value"] = 0
    df_cities_copy["week"] = 0
    
    week = count + 11
    year = 2021
    date = datetime.date(year,1,1)+relativedelta(weeks=+week)
    
    try:
        df_city_values = load_data(link,0,None)
        insert_by_index(df_cities_copy,df_city_values,"value","Sayı",date,count)
        df_cities_new = pd.concat([df_cities_new,df_cities_copy])
        #print("burdayım"+str(i))
    except:
        html = pd.read_html(link, header = None)
        df_return = html[0]
        insert_by_index(df_cities_copy,df_return,"value",1,date,count)
        df_cities_new = pd.concat([df_cities_new,df_cities_copy])
        



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/i

In [54]:
df_cities_new["value"]= df_cities_new["value"].astype(float)

In [55]:
pd.to_datetime(df_cities_new['week'],errors='ignore') 

0    2021-03-26
1    2021-03-26
2    2021-03-26
3    2021-03-26
4    2021-03-26
        ...    
76   2021-05-28
77   2021-05-28
78   2021-05-28
79   2021-05-28
80   2021-05-28
Name: week, Length: 810, dtype: datetime64[ns]

In [56]:
df_cities_new.to_csv("df_cities_risk_weekly.csv",index=False)