# Import Libraries : 

In [None]:
from datetime import datetime
import pandas as pd 
import numpy as np
from meteostat import Daily, Point, Stations
from geopy.geocoders import Nominatim
from tqdm import tqdm

# Download Data : 

In [None]:
def data_downloader():
    cities = [
        "Cairo", "Riyadh", "Phoenix", "Las Vegas", "Lima", "Ulaanbaatar", "Moscow", "Chicago", "Toronto", "Warsaw",
        "Berlin", "London", "Paris", "Sydney", "Buenos Aires", "Tokyo", "Shanghai", "Mumbai", "Bangkok", "Jakarta",
        "Singapore", "Nairobi", "Addis Ababa", "Cape Town", "Athens", "Rome", "Los Angeles", "San Francisco", "Honolulu", "Reykjavik",
        "Nuuk", "Anchorage", "Helsinki", "Oslo", "Stockholm", "Tallinn", "Tehran", "Islamabad", "Kabul", "Baghdad",
        "Santiago", "Quito", "Bogota", "Caracas", "Panama City", "Mexico City", "Guatemala City", "San Jose", "Hanoi", "Seoul",
        "Beijing", "Hong Kong", "Kuala Lumpur", "Manila", "Perth", "Brisbane", "Melbourne", "Wellington", "Auckland", "Vancouver",
        "Montreal", "Halifax", "Winnipeg", "Fairbanks", "Yellowknife", "Barrow (Utqiaġvik)", "Tromsø", "Novosibirsk", "Vladivostok", "Irkutsk",
        "Bishkek", "Astana", "Tashkent", "Yerevan", "Baku", "Amman", "Beirut", "Tunis", "Algiers", "Dakar",
        "Accra", "Lagos", "Johannesburg", "Harare", "Gaborone", "Windhoek", "Lusaka", "New York", "Miami", "Denver",
        "Boston", "Dallas", "Seattle", "Istanbul", "Barcelona", "Kuala Terengganu", "Mombasa", "Port Moresby", "Nouméa", "Honiara"
    ]

    geolocator = Nominatim(user_agent="AI-Weather-Predictor")
    start_date = datetime(2010, 1, 1)
    end_date = datetime(2024, 12, 31)

    all_data = []

    for city in tqdm(cities, desc="Downloading Weather Data"):
        try:

            location = geolocator.geocode(city, timeout=10)
            if not location:
                print(f"❌ Could not geocode {city}")
                continue

            lat, lon = location.latitude, location.longitude


            stations = Stations().nearby(lat, lon).fetch(10)


            valid_stations = stations[
                (stations['daily_start'] <= pd.to_datetime(start_date)) &
                (stations['daily_end'] >= pd.to_datetime(end_date))
            ]

            if valid_stations.empty:
                print(f"⚠️ No valid station with full data range for {city}")
                continue


            station_id = valid_stations.index[0]
            daily_data = Daily(station_id, start=start_date, end=end_date).fetch()


            daily_data["city"] = city
            all_data.append(daily_data)

        except Exception as e:
            print(f"❗ Error for {city}: {e}")
            continue


    if all_data:
        return pd.concat(all_data).reset_index()
    else:
        return pd.DataFrame()


In [None]:
raw_data = data_downloader()

# Save Data : 

In [None]:
raw_data.to_csv('/home/sina.tvk.1997/AI-weather-predictor/data/raw_data.csv',index=False)