# GeoNames to get City and Country information

In [None]:
!pip3 install numpy
!pip3 install pandas
!pip3 install openpyxl
!pip3 install requests
!pip3 install geopandas

- [get-started-geo-names-api](https://www.geonames.org/export/web-services.html)
- [enable-webservice](https://www.geonames.org/enablefreewebservice)
- [geonames-search-api](https://www.geonames.org/export/geonames-search.html)
- [download-geo-names-datasets](https://download.geonames.org/export/dump/)

In [2]:
import requests
import pandas as pd
from zipfile import ZipFile
from io import BytesIO

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [58]:
# Télécharger et décompresser le fichier Geonames cities file cities15000.zip
geonames_url = "http://download.geonames.org/export/dump/cities15000.zip"
response = requests.get(geonames_url)
zip_file = ZipFile(BytesIO(response.content))
file_name = zip_file.namelist()[0]
cities_data = pd.read_csv(zip_file.open(file_name), sep='\t', header=None, names=[
    'geonameid', 'name', 'asciiname', 'alternatenames', 'latitude', 'longitude',
    'feature class', 'feature code', 'country_code', 'cc2', 'admin1 code',
    'admin2 code', 'admin3 code', 'admin4 code', 'population', 'elevation',
    'dem', 'timezone', 'modification date'
])

# Charger les informations sur les pays à partir du fichier countryInfo.txt de Geonames
country_info_url = "http://download.geonames.org/export/dump/countryInfo.txt"
country_info_data = pd.read_csv(country_info_url, sep='\t', comment='#', header=None, names=[
    'ISO', 'ISO3', 'ISO-Numeric', 'fips', 'country', 'Capital', 'Area(in sq km)', 'Population',
    'Continent', 'tld', 'CurrencyCode', 'CurrencyName', 'Phone', 'Postal Code Format', 'Postal Code Regex',
    'Languages', 'geonameid', 'neighbours', 'EquivalentFipsCode'
])

# Fusionner les informations sur les villes avec les noms des pays
cities_data = cities_data.merge(country_info_data[['ISO', 'country']], left_on='country_code', right_on='ISO', how='left')

In [59]:
cities_data

Unnamed: 0,geonameid,name,asciiname,alternatenames,latitude,longitude,feature class,feature code,country_code,cc2,...,admin2 code,admin3 code,admin4 code,population,elevation,dem,timezone,modification date,ISO,country
0,3040051,les Escaldes,les Escaldes,"Ehskal'des-Ehndzhordani,Escaldes,Escaldes-Engo...",42.50729,1.53414,P,PPLA,AD,,...,,,,15853,,1033,Europe/Andorra,2024-06-20,AD,Andorra
1,3041563,Andorra la Vella,Andorra la Vella,"ALV,Ando-la-Vyey,Andora,Andora la Vela,Andora ...",42.50779,1.52109,P,PPLC,AD,,...,,,,20430,,1037,Europe/Andorra,2020-03-03,AD,Andorra
2,290503,Warīsān,Warisan,"Warisan,Warsan,Warīsān,wrsan,ورسان",25.16744,55.40708,P,PPL,AE,,...,,,,108759,,12,Asia/Dubai,2024-06-11,AE,United Arab Emirates
3,290594,Umm Al Quwain City,Umm Al Quwain City,"Oumm al Qaiwain,Oumm al Qaïwaïn,Um al Kawain,U...",25.56473,55.55517,P,PPLA,AE,,...,,,,62747,,2,Asia/Dubai,2019-10-24,AE,United Arab Emirates
4,291074,Ras Al Khaimah City,Ras Al Khaimah City,"Julfa,Khaimah,RAK City,RKT,Ra's al Khaymah,Ra'...",25.78953,55.94320,P,PPLA,AE,,...,,,,351943,,2,Asia/Dubai,2019-09-09,AE,United Arab Emirates
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28435,894701,Bulawayo,Bulawayo,"BUQ,Bulavajas,Bulavajo,Bulavejo,Bulawayo,bu la...",-20.15000,28.58333,P,PPLA,ZW,,...,,,,1200337,,1348,Africa/Harare,2023-02-24,ZW,Zimbabwe
28436,895061,Bindura,Bindura,"Bindura,Bindura Town,Kimberley Reefs,Биндура",-17.30192,31.33056,P,PPLA,ZW,,...,,,,50400,,1118,Africa/Harare,2023-02-24,ZW,Zimbabwe
28437,895269,Beitbridge,Beitbridge,"Bajtbridz,Bajtbridzh,Beitbridge,Beitbridzas,Be...",-22.21667,30.00000,P,PPL,ZW,,...,,,,58100,,461,Africa/Harare,2024-01-18,ZW,Zimbabwe
28438,1085510,Epworth,Epworth,Epworth,-17.89000,31.14750,P,PPLX,ZW,,...,,,,123250,,1508,Africa/Harare,2012-01-19,ZW,Zimbabwe


In [60]:
# Garder uniquement les colonnes pertinentes
cities_data = cities_data[['asciiname', 'alternatenames', 'latitude', 'longitude', 'country_code', 'country']].rename(columns={'asciiname': 'name'})

In [61]:
# Save the DataFrame to a CSV file
cities_data.to_csv("cities_data.csv", index=False)

In [5]:
cities_data = pd.read_csv("cities_data.csv")

# Construire des Dataset pour avoir les informations sur les villes

In [28]:
def has_beach(city_name, country_code, beach_cities_df):
    return not beach_cities_df[(beach_cities_df['name'].str.lower() == city_name.lower()) & (beach_cities_df['country_code'] == country_code)].empty

def is_family_friendly(city_name, country_code, family_friendly_cities_df):
    return not family_friendly_cities_df[(family_friendly_cities_df['name'].str.lower() == city_name.lower()) & (family_friendly_cities_df['country_code'] == country_code)].empty

def has_ski_resort(city_name, country_code, ski_resort_cities_df):
    return not ski_resort_cities_df[(ski_resort_cities_df['name'].str.lower() == city_name.lower()) & (ski_resort_cities_df['country_code'] == country_code)].empty

def has_golf_course(city_name, country_code, golf_course_cities_df):
    return not golf_course_cities_df[(golf_course_cities_df['name'].str.lower() == city_name.lower()) & (golf_course_cities_df['country_code'] == country_code)].empty

In [23]:
beach_cities_df = pd.read_csv("beach_cities.csv")
cities_data['beach'] = cities_data.apply(lambda row: has_beach(row['name'], row['country_code'], beach_cities_df), axis=1)

In [24]:
family_friendly_cities_df = pd.read_csv("family_friendly_cities.csv")
cities_data['family'] = cities_data.apply(lambda row: is_family_friendly(row['name'], row['country_code'], family_friendly_cities_df), axis=1)

In [25]:
ski_resort_cities_df = pd.read_csv("ski_resort_cities.csv")
cities_data['ski'] = cities_data.apply(lambda row: has_ski_resort(row['name'], row['country_code'], family_friendly_cities_df), axis=1)

In [29]:
golf_course_cities_df = pd.read_csv("golf_course_cities.csv")
cities_data['golf'] = cities_data.apply(lambda row: has_golf_course(row['name'], row['country_code'], golf_course_cities_df), axis=1)

In [31]:
cities_data

Unnamed: 0,name,alternatenames,latitude,longitude,country_code,country,beach,family,ski,golf
0,les Escaldes,"Ehskal'des-Ehndzhordani,Escaldes,Escaldes-Engo...",42.50729,1.53414,AD,Andorra,False,False,False,False
1,Andorra la Vella,"ALV,Ando-la-Vyey,Andora,Andora la Vela,Andora ...",42.50779,1.52109,AD,Andorra,False,False,False,False
2,Warisan,"Warisan,Warsan,Warīsān,wrsan,ورسان",25.16744,55.40708,AE,United Arab Emirates,False,False,False,False
3,Umm Al Quwain City,"Oumm al Qaiwain,Oumm al Qaïwaïn,Um al Kawain,U...",25.56473,55.55517,AE,United Arab Emirates,True,False,False,False
4,Ras Al Khaimah City,"Julfa,Khaimah,RAK City,RKT,Ra's al Khaymah,Ra'...",25.78953,55.94320,AE,United Arab Emirates,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...
28435,Bulawayo,"BUQ,Bulavajas,Bulavajo,Bulavejo,Bulawayo,bu la...",-20.15000,28.58333,ZW,Zimbabwe,False,False,False,False
28436,Bindura,"Bindura,Bindura Town,Kimberley Reefs,Биндура",-17.30192,31.33056,ZW,Zimbabwe,False,False,False,False
28437,Beitbridge,"Bajtbridz,Bajtbridzh,Beitbridge,Beitbridzas,Be...",-22.21667,30.00000,ZW,Zimbabwe,False,False,False,False
28438,Epworth,Epworth,-17.89000,31.14750,ZW,Zimbabwe,False,False,False,False


In [32]:
# Save the DataFrame to a CSV file
cities_data.to_csv("cities_data.csv", index=False)