In [2]:
import requests
import pandas as pd
from datetime import datetime

API_KEY = "eee0f55dccceb67630a015d0577f12cb"
CITY = "Indore"
URL = f"https://api.openweathermap.org/data/2.5/weather?q={CITY}&appid={API_KEY}&units=metric"

response = requests.get(URL)
data = response.json()

print("API Response:", data)   # Debug line

if "main" in data and "wind" in data:
    weather = {
        "City": CITY,
        "Temperature (°C)": data["main"]["temp"],
        "Humidity (%)": data["main"]["humidity"],
        "Wind Speed (m/s)": data["wind"]["speed"],
        "Wind Direction (°)": data["wind"].get("deg", None),
        "Timestamp": datetime.now()
    }

    df = pd.DataFrame([weather])
    print(df)

else:
    print("❌ Error:", data.get("message", "Unknown error"))



API Response: {'coord': {'lon': 75.8333, 'lat': 22.7179}, 'weather': [{'id': 721, 'main': 'Haze', 'description': 'haze', 'icon': '50d'}], 'base': 'stations', 'main': {'temp': 21.1, 'feels_like': 20.73, 'temp_min': 21.1, 'temp_max': 21.1, 'pressure': 1017, 'humidity': 56, 'sea_level': 1017, 'grnd_level': 953}, 'visibility': 2500, 'wind': {'speed': 1.54, 'deg': 110}, 'clouds': {'all': 39}, 'dt': 1764561705, 'sys': {'type': 1, 'id': 9067, 'country': 'IN', 'sunrise': 1764552028, 'sunset': 1764591073}, 'timezone': 19800, 'id': 1269743, 'name': 'Indore', 'cod': 200}
     City  Temperature (°C)  Humidity (%)  Wind Speed (m/s)  \
0  Indore              21.1            56              1.54   

   Wind Direction (°)                  Timestamp  
0                 110 2025-12-01 09:33:35.598986  


In [14]:
import gzip
import json
import pandas as pd

CITY_LIST_FILE = "D:\\codes\\brain tumor project yz\\infosys\\city.list.json.gz"

# Load file
with gzip.open(CITY_LIST_FILE, "rt", encoding="utf-8") as f:
    data = json.load(f)

# Convert to DataFrame
df = pd.DataFrame(data)

# Filter population ≥100,000
df_major = df[df["population"] >= 100000]

# Sort by population (descending)
df_major = df_major.sort_values(by="population", ascending=False)

# Select top 1200 cities
df_major_1200 = df_major.head(1200)

# Save only ID column (for API use)
df_major_1200.to_csv("major_city_ids.csv", index=False)

print("Saved major_city_ids.csv")
print(df_major_1200.head())


KeyError: 'population'

In [None]:
import requests
import pandas as pd
from datetime import datetime
import time

API_KEY = "eee0f55dccceb67630a015d0577f12cb"

# Load list of 1200 city IDs
all_city_ids = [...]   # paste your 1200 IDs here

weather_data = []

for idx, city_id in enumerate(all_city_ids):
    
    url = f"http://api.openweathermap.org/data/2.5/weather?id={city_id}&appid={API_KEY}&units=metric"
    response = requests.get(url).json()

    if "main" in response:
        weather = {
            "City": response["name"],
            "Temperature (°C)": response["main"]["temp"],
            "Humidity (%)": response["main"]["humidity"],
            "Wind Speed (m/s)": response["wind"]["speed"],
            "Wind Direction (°)": response["wind"].get("deg", None),
            "Timestamp": datetime.now()
        }
        weather_data.append(weather)
        print(f"Fetched: {response['name']}  ({idx+1}/{len(all_city_ids)})")
    else:
        print("Error:", response)

    time.sleep(1)   # Required (avoid rate limit)

df = pd.DataFrame(weather_data)
print(df)


In [None]:

# Corrected + improved version with visualization support (world map using plotly)

import requests
import gzip
import json
import pandas as pd
import time
import random
from datetime import datetime
from itertools import islice
import plotly.express as px

# -------- CONFIG --------
API_KEY = "495ba73cba95e25b95cfaed153ef1c2d"   
CITY_LIST_URL = "http://bulk.openweathermap.org/sample/city.list.json.gz"
OUTPUT_CSV = "global_weather_data.csv"
NUM_CITIES = 1200  ##60 calls/min × 20 IDs/call
BATCH_SIZE = 20
REQUEST_DELAY = 1.0
TIMEOUT = 15
# ------------------------

def chunks(iterable, size):
    it = iter(iterable)
    while True:
        chunk = list(islice(it, size))
        if not chunk:
            break
        yield chunk

def download_city_list(url=CITY_LIST_URL):
    print("Downloading city list...")
    r = requests.get(url, stream=True, timeout=TIMEOUT)
    r.raise_for_status()
    with gzip.GzipFile(fileobj=r.raw) as f:
        data = json.load(f)
    print(f"Downloaded {len(data)} cities.")
    return data

def sample_city_ids(city_list, n=NUM_CITIES, seed=42):
    ids = [c["id"] for c in city_list if "id" in c]
    n = min(n, len(ids))
    random.Random(seed).shuffle(ids)
    sampled = ids[:n]
    print(f"Sampled {len(sampled)} city IDs.")
    return sampled

def fetch_batch(city_ids):
    ids = ",".join(map(str, city_ids))
    url = f"https://api.openweathermap.org/data/2.5/group?id={ids}&appid={API_KEY}"
    return requests.get(url).json()

def parse_weather_item(item):
    return {
        "city_id": item.get("id"),
        "city": item.get("name"),
        "country": item.get("sys", {}).get("country"),
        "lat": item.get("coord", {}).get("lat"),
        "lon": item.get("coord", {}).get("lon"),
        "temperature_C": item.get("main", {}).get("temp"),
        "humidity_pct": item.get("main", {}).get("humidity"),
        "pressure_hPa": item.get("main", {}).get("pressure"),
        "wind_speed_m_s": item.get("wind", {}).get("speed"),
        "wind_deg": item.get("wind", {}).get("deg"),
        "weather_main": item.get("weather", [{}])[0].get("main"),
        "weather_description": item.get("weather", [{}])[0].get("description"),
        "timestamp_utc": datetime.utcfromtimestamp(item.get("dt")) if item.get("dt") else None,
        "fetched_at": datetime.utcnow()
    }

def visualize_world_map(df):
    fig = px.scatter_geo(
        df,
        lat="lat",
        lon="lon",
        color="temperature_C",
        hover_name="city",
        hover_data=["country", "humidity_pct", "wind_speed_m_s", "weather_main"],
        projection="natural earth",
        title="Global Weather Visualization (Temperature Colored)"
    )
    fig.show()

def main():
    city_list = download_city_list()
    sampled_ids = sample_city_ids(city_list, NUM_CITIES)

    all_records = []
    total_batches = (len(sampled_ids) + BATCH_SIZE - 1) // BATCH_SIZE

    print(f"Fetching weather in {total_batches} batches...")

    print("Fetching weather batches...")

    for i in range(0, len(sampled_ids), 20):
        batch = sampled_ids[i:i+20]
        print(f"Fetching batch {i//20 + 1} → {len(batch)} cities")

    try:
        data = fetch_batch(batch, API_KEY)
        for item in data.get("list", []):
            all_records.append(parse_weather_item(item))

    except Exception as e:
        print(f"Error fetching batch {i//20 + 1}: {e}")
    
    time.sleep(1)   # rate limit protection


    if all_records:
        df = pd.DataFrame(all_records)
        df.to_csv(OUTPUT_CSV, index=False)
        print(f"Saved {len(df)} records → {OUTPUT_CSV}")
        visualize_world_map(df)
    else:
        print("No weather data fetched. Check API key and limits.")

if __name__ == "__main__":
    main()


Downloading city list...
Downloaded 209579 cities.
Sampled 1200 city IDs.
Fetching weather in 60 batches...
Fetching weather batches...
Fetching batch 1 → 20 cities
Fetching batch 2 → 20 cities
Fetching batch 3 → 20 cities
Fetching batch 4 → 20 cities
Fetching batch 5 → 20 cities
Fetching batch 6 → 20 cities
Fetching batch 7 → 20 cities
Fetching batch 8 → 20 cities
Fetching batch 9 → 20 cities
Fetching batch 10 → 20 cities
Fetching batch 11 → 20 cities
Fetching batch 12 → 20 cities
Fetching batch 13 → 20 cities
Fetching batch 14 → 20 cities
Fetching batch 15 → 20 cities
Fetching batch 16 → 20 cities
Fetching batch 17 → 20 cities
Fetching batch 18 → 20 cities
Fetching batch 19 → 20 cities
Fetching batch 20 → 20 cities
Fetching batch 21 → 20 cities
Fetching batch 22 → 20 cities
Fetching batch 23 → 20 cities
Fetching batch 24 → 20 cities
Fetching batch 25 → 20 cities
Fetching batch 26 → 20 cities
Fetching batch 27 → 20 cities
Fetching batch 28 → 20 cities
Fetching batch 29 → 20 cities
Fet

In [8]:
import requests

url = "https://api.openweathermap.org/data/2.5/group?id=1275841&appid=eee0f55dccceb67630a015d0577f12cb"
print(requests.get(url).json())


{'cod': 401, 'message': 'Invalid API key. Please see https://openweathermap.org/faq#error401 for more info.'}


In [5]:
import json
import gzip
import pandas as pd

# Load the major cities list
with gzip.open("D:\\codes\\brain tumor project yz\\infosys\\city.list.json.gz", "rt", encoding="utf-8") as f:
    cities = json.load(f)

df_cities = pd.DataFrame(cities)
print(df_cities.head())
print("Total cities:", len(df_cities))


       id           name state country                                 coord
0   833.0  Ḩeşār-e Sefīd            IR  {'lon': 47.159401, 'lat': 34.330502}
1  2960.0   ‘Ayn Ḩalāqīm            SY  {'lon': 36.321911, 'lat': 34.940079}
2  3245.0         Taglag            IR   {'lon': 44.98333, 'lat': 38.450001}
3  3530.0       Qabāghlū            IR  {'lon': 46.168499, 'lat': 36.173302}
4  5174.0        ‘Arīqah            SY   {'lon': 36.48336, 'lat': 32.889809}
Total cities: 209579


Total major cities selected: 0

✔ COMPLETED!
File saved as: global_major_cities_weather.csv


In [9]:
import requests
import pandas as pd
from datetime import datetime
import time
import json
import gzip

API_KEY = "eee0f55dccceb67630a015d0577f12cb"

CITY_FILE = r"D:\\codes\\brain tumor project yz\\infosys\\city.list.json.gz"

# ---- Load city list ----
with gzip.open(CITY_FILE, "rt", encoding="utf-8") as f:
    cities = json.load(f)

# ---- List of major global cities ----
TOP_CITIES = [
    "Tokyo", "Delhi", "Shanghai", "São Paulo", "Mumbai", "Mexico City", "Osaka",
    "Cairo", "New York", "Beijing", "Karachi", "Buenos Aires", "Istanbul",
    "Manila", "Lagos", "Rio de Janeiro", "Los Angeles", "Moscow", "Paris",
    "London", "Dhaka", "Jakarta", "Bangkok", "Dubai", "Seoul", "Hong Kong",
    "Chennai", "Hyderabad", "Bengaluru", "Kolkata", "Sydney", "Melbourne"
]

# Match these city names with the list → get their city IDs
selected_ids = []
for city in cities:
    if city["name"] in TOP_CITIES:
        selected_ids.append((city["id"], city["name"], city["country"]))

print(f"Selected cities found: {len(selected_ids)}")

weather_data = []

# ---- Fetch Weather ----
for i, (city_id, name, country) in enumerate(selected_ids):
    print(f"Fetching {i+1}/{len(selected_ids)} → {name}, {country}")

    url = f"https://api.openweathermap.org/data/2.5/weather?id={city_id}&appid={API_KEY}&units=metric"
    
    try:
        res = requests.get(url)
        data = res.json()

        if "main" in data:
            weather_data.append({
                "City": name,
                "Country": country,
                "Latitude": data["coord"]["lat"],
                "Longitude": data["coord"]["lon"],
                "Temperature (°C)": data["main"]["temp"],
                "Humidity (%)": data["main"]["humidity"],
                "Wind Speed (m/s)": data["wind"]["speed"],
                "Weather": data["weather"][0]["description"],
                "Timestamp": datetime.utcnow()
            })
    except Exception as e:
        print("Error:", e)

    time.sleep(1)

# ---- Save CSV ----
df = pd.DataFrame(weather_data)
df.to_csv("global_major_cities_weather.csv", index=False)

print("✔ Completed!")
print("Saved: global_major_cities_weather.csv")


Selected cities found: 91
Fetching 1/91 → Dubai, AE
Fetching 2/91 → Dubai, AE
Fetching 3/91 → Cairo, EG
Fetching 4/91 → Moscow, RU
Fetching 5/91 → Moscow, RU
Fetching 6/91 → Istanbul, TR
Fetching 7/91 → Karachi, PK
Fetching 8/91 → Hyderabad, PK
Fetching 9/91 → Dhaka, BD
Fetching 10/91 → Chennai, IN
Fetching 11/91 → Hyderabad, IN
Fetching 12/91 → Delhi, IN
Fetching 13/91 → Kolkata, IN
Fetching 14/91 → Mumbai, IN
Fetching 15/91 → Bengaluru, IN
Fetching 16/91 → Dhaka, BD
Fetching 17/91 → Bangkok, TH
Fetching 18/91 → Bangkok, TH
Fetching 19/91 → Jakarta, ID
Fetching 20/91 → Manila, PH
Fetching 21/91 → Los Angeles, PH
Fetching 22/91 → Shanghai, CN
Fetching 23/91 → Beijing, CN
Fetching 24/91 → Hong Kong, HK
Fetching 25/91 → Seoul, KR
Fetching 26/91 → Seoul, KR
Fetching 27/91 → Tokyo, JP
Fetching 28/91 → Osaka, JP
Fetching 29/91 → Osaka, JP
Fetching 30/91 → Sydney, AU
Fetching 31/91 → Melbourne, AU
Fetching 32/91 → Lagos, PT
Fetching 33/91 → Lagos, NG
Fetching 34/91 → Melbourne, GB
Fetching 3

In [None]:
import pandas as pd
import requests
from datetime import datetime
import time

# -----------------------------------------
# STEP 1 — Load the 30K global cities dataset
# -----------------------------------------
cities = pd.read_csv(
    "D:\codes\\brain tumor project yz\\infosys\\cities5000.txt",
    sep="\t",
    header=None,
    names=[
        "geonameid", "name", "asciiname", "alternatenames",
        "latitude", "longitude", "feature_class", "feature_code",
        "country_code", "cc2", "admin1", "admin2", "admin3",
        "admin4", "population", "elevation", "dem",
        "timezone", "modification"
    ],
    dtype=str
)

# Keep only useful columns
cities = cities[["name", "country_code", "latitude", "longitude", "population"]]

# Keep cities with population available
cities = cities.dropna(subset=["latitude", "longitude"])

# Convert latitude/longitude
cities["latitude"] = cities["latitude"].astype(float)
cities["longitude"] = cities["longitude"].astype(float)

# Keep top 30,000 cities
cities = cities.head(30000)

print("Total Cities Loaded:", len(cities))

# -----------------------------------------
# STEP 2 — Fetch Weather from Open-Meteo
# -----------------------------------------
weather_data = []

def get_weather(lat, lon):
    url = f"https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lon}&current_weather=true"

    try:
        res = requests.get(url, timeout=10)
        data = res.json()

        if "current_weather" not in data:
            return None

        w = data["current_weather"]

        return {
            "Temperature (°C)": w.get("temperature"),
            "Wind Speed (m/s)": w.get("windspeed"),
            "Wind Direction (°)": w.get("winddirection"),
            "Timestamp": datetime.utcnow()
        }
    except:
        return None

# -----------------------------------------
# STEP 3 — Loop cities with safe rate
# -----------------------------------------
count = 0

for idx, row in cities.iterrows():
    city = row["name"]
    country = row["country_code"]
    lat = row["latitude"]
    lon = row["longitude"]

    weather = get_weather(lat, lon)

    if weather:
        record = {
            "City": city,
            "Country": country,
            "Latitude": lat,
            "Longitude": lon,
            "Temperature (°C)": weather["Temperature (°C)"],
            "Humidity (%)": None,           # free API does not provide (optional field)
            "Wind Speed (m/s)": weather["Wind Speed (m/s)"],
            "Wind Direction (°)": weather["Wind Direction (°)"],
            "Timestamp": weather["Timestamp"],
        }

        weather_data.append(record)
        count += 1

    if count % 100 == 0:
        print(f"Fetched weather for {count} cities...")

    time.sleep(0.2)  # avoid too many requests

# -----------------------------------------
# STEP 4 — Save to CSV
# -----------------------------------------
df_weather = pd.DataFrame(weather_data)
df_weather.to_csv("global_30k_weather.csv", index=False)

print("\n✔ COMPLETED!")
print("Total cities with weather data:", len(df_weather))
print("File saved as: global_30k_weather.csv")


Total Cities Loaded: 30000
Fetched weather for 100 cities...
Fetched weather for 200 cities...
Fetched weather for 300 cities...
Fetched weather for 400 cities...
Fetched weather for 500 cities...
Fetched weather for 600 cities...
Fetched weather for 700 cities...
Fetched weather for 800 cities...
Fetched weather for 900 cities...
Fetched weather for 1000 cities...
Fetched weather for 1100 cities...
Fetched weather for 1200 cities...
Fetched weather for 1300 cities...
Fetched weather for 1400 cities...
Fetched weather for 1500 cities...
Fetched weather for 1600 cities...
Fetched weather for 1700 cities...
Fetched weather for 1800 cities...
Fetched weather for 1900 cities...
Fetched weather for 2000 cities...
Fetched weather for 2100 cities...
Fetched weather for 2200 cities...
Fetched weather for 2300 cities...
Fetched weather for 2400 cities...
Fetched weather for 2500 cities...
Fetched weather for 2600 cities...
Fetched weather for 2700 cities...
Fetched weather for 2800 cities...
Fe