# GeoNames to get City and Country information

- [get-started-geo-names-api](https://www.geonames.org/export/web-services.html)
- [enable-webservice](https://www.geonames.org/enablefreewebservice)
- [geonames-search-api](https://www.geonames.org/export/geonames-search.html)
- [download-geo-names-datasets](https://download.geonames.org/export/dump/)

In [41]:
import requests
import pandas as pd
from zipfile import ZipFile
from io import BytesIO

In [58]:
# Télécharger et décompresser le fichier Geonames cities file cities15000.zip
geonames_url = "http://download.geonames.org/export/dump/cities15000.zip"
response = requests.get(geonames_url)
zip_file = ZipFile(BytesIO(response.content))
file_name = zip_file.namelist()[0]
cities_data = pd.read_csv(zip_file.open(file_name), sep='\t', header=None, names=[
    'geonameid', 'name', 'asciiname', 'alternatenames', 'latitude', 'longitude',
    'feature class', 'feature code', 'country_code', 'cc2', 'admin1 code',
    'admin2 code', 'admin3 code', 'admin4 code', 'population', 'elevation',
    'dem', 'timezone', 'modification date'
])

# Charger les informations sur les pays à partir du fichier countryInfo.txt de Geonames
country_info_url = "http://download.geonames.org/export/dump/countryInfo.txt"
country_info_data = pd.read_csv(country_info_url, sep='\t', comment='#', header=None, names=[
    'ISO', 'ISO3', 'ISO-Numeric', 'fips', 'country', 'Capital', 'Area(in sq km)', 'Population',
    'Continent', 'tld', 'CurrencyCode', 'CurrencyName', 'Phone', 'Postal Code Format', 'Postal Code Regex',
    'Languages', 'geonameid', 'neighbours', 'EquivalentFipsCode'
])

# Fusionner les informations sur les villes avec les noms des pays
cities_data = cities_data.merge(country_info_data[['ISO', 'country']], left_on='country_code', right_on='ISO', how='left')

In [59]:
cities_data

Unnamed: 0,geonameid,name,asciiname,alternatenames,latitude,longitude,feature class,feature code,country_code,cc2,...,admin2 code,admin3 code,admin4 code,population,elevation,dem,timezone,modification date,ISO,country
0,3040051,les Escaldes,les Escaldes,"Ehskal'des-Ehndzhordani,Escaldes,Escaldes-Engo...",42.50729,1.53414,P,PPLA,AD,,...,,,,15853,,1033,Europe/Andorra,2024-06-20,AD,Andorra
1,3041563,Andorra la Vella,Andorra la Vella,"ALV,Ando-la-Vyey,Andora,Andora la Vela,Andora ...",42.50779,1.52109,P,PPLC,AD,,...,,,,20430,,1037,Europe/Andorra,2020-03-03,AD,Andorra
2,290503,Warīsān,Warisan,"Warisan,Warsan,Warīsān,wrsan,ورسان",25.16744,55.40708,P,PPL,AE,,...,,,,108759,,12,Asia/Dubai,2024-06-11,AE,United Arab Emirates
3,290594,Umm Al Quwain City,Umm Al Quwain City,"Oumm al Qaiwain,Oumm al Qaïwaïn,Um al Kawain,U...",25.56473,55.55517,P,PPLA,AE,,...,,,,62747,,2,Asia/Dubai,2019-10-24,AE,United Arab Emirates
4,291074,Ras Al Khaimah City,Ras Al Khaimah City,"Julfa,Khaimah,RAK City,RKT,Ra's al Khaymah,Ra'...",25.78953,55.94320,P,PPLA,AE,,...,,,,351943,,2,Asia/Dubai,2019-09-09,AE,United Arab Emirates
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28435,894701,Bulawayo,Bulawayo,"BUQ,Bulavajas,Bulavajo,Bulavejo,Bulawayo,bu la...",-20.15000,28.58333,P,PPLA,ZW,,...,,,,1200337,,1348,Africa/Harare,2023-02-24,ZW,Zimbabwe
28436,895061,Bindura,Bindura,"Bindura,Bindura Town,Kimberley Reefs,Биндура",-17.30192,31.33056,P,PPLA,ZW,,...,,,,50400,,1118,Africa/Harare,2023-02-24,ZW,Zimbabwe
28437,895269,Beitbridge,Beitbridge,"Bajtbridz,Bajtbridzh,Beitbridge,Beitbridzas,Be...",-22.21667,30.00000,P,PPL,ZW,,...,,,,58100,,461,Africa/Harare,2024-01-18,ZW,Zimbabwe
28438,1085510,Epworth,Epworth,Epworth,-17.89000,31.14750,P,PPLX,ZW,,...,,,,123250,,1508,Africa/Harare,2012-01-19,ZW,Zimbabwe


In [60]:
# Garder uniquement les colonnes pertinentes
cities_data = cities_data[['asciiname', 'alternatenames', 'latitude', 'longitude', 'country_code', 'country']].rename(columns={'asciiname': 'name'})

In [61]:
# Save the DataFrame to a CSV file
cities_data.to_csv("cities_data.csv", index=False)

In [62]:
cities_data = pd.read_csv("cities_data.csv")

In [None]:
# Example datasets for beach, family, ski, and golf (simplified examples)
beach_cities = ['Los Angeles', 'Miami', 'Barcelona']  # Simplified example
family_friendly_cities = ['Paris', 'New York', 'Tokyo']  # Simplified example
ski_resorts = ['Chamonix', 'Aspen', 'Whistler']  # Simplified example
golf_courses = ['St Andrews', 'Augusta', 'Pebble Beach']  # Simplified example

# Functions to check for beach, family, ski, and golf
def has_beach(city_name):
    return city_name in beach_cities

def is_family_friendly(city_name):
    return city_name in family_friendly_cities

def has_ski_resorts(city_name):
    return city_name in ski_resorts

def has_golf_courses(city_name):
    return city_name in golf_courses

In [None]:
# Appliquer les fonctions aux données des villes
cities_data['beach'] = cities_data['name'].apply(has_beach)
cities_data['family'] = cities_data['name'].apply(is_family_friendly)
cities_data['ski'] = cities_data['name'].apply(has_ski_resorts)
cities_data['golf'] = cities_data['name'].apply(has_golf_courses)

In [5]:
cities_data

Unnamed: 0,name,country,latitude,longitude,beach,family,ski,golf
0,Paris,France,48.85341,2.3488,False,False,False,False
1,New York,United States,40.71427,-74.00597,False,False,False,False
2,Tokyo,Japan,35.6895,139.69171,False,False,False,False
3,London,United Kingdom,51.50853,-0.12574,False,False,False,False
4,Berlin,Germany,52.52437,13.41053,False,False,False,False


In [None]:
# Save the DataFrame to a CSV file
cities_data.to_csv("cities_data.csv", index=False)