<a href="https://colab.research.google.com/github/saerarawas/AAI_634O_A11_202520/blob/main/week3/Practical_Exercise_Integrating_Weather_Data_Rawas_Saera.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Integrating Weather Data Into a
Sales Dataset Using APIs and MongoDB

In [103]:
import requests
import pandas as pd


**Step 1: Extract Sales Data from CSV**

The first step is to extract the sales data from the CSV file. This will serve as the main
dataset to which we will add weather data

In [104]:
import pandas as pd
import datetime
import random

# Generate some sample data
data = {
    'date': [datetime.date(2025, 1, i).strftime('%Y-%m-%d') for i in range(1, 11)],
    'product id': [random.randint(1000, 2000) for _ in range(10)],
    'sales amount': [round(random.uniform(50, 300), 2) for _ in range(10)],
    'store location': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix',
                       'Philadelphia', 'San Antonio', 'San Diego', 'Dallas', 'San Jose']
}

# Create DataFrame
sales_data = pd.DataFrame(data)

# Save DataFrame to CSV
sales_data.to_csv('sales_data.csv', index=False)

print("CSV file created successfully!")

# Display the first few rows of the DataFrame
sales_data.head()


CSV file created successfully!


Unnamed: 0,date,product id,sales amount,store location
0,2025-01-01,1877,155.17,New York
1,2025-01-02,1581,55.56,Los Angeles
2,2025-01-03,1054,256.73,Chicago
3,2025-01-04,1684,153.83,Houston
4,2025-01-05,1315,243.57,Phoenix


**Step 2: Fetch Weather Data from the API**

Next, use the OpenWeatherMap API to fetch weather data for each store location on the
corresponding transaction date.<br>API Setup:<br>
• Sign up for an OpenWeatherMap API key at OpenWeatherMap.<br>
• Use the requests library in Python to pull weather data.

In [105]:
def fetch_weather_data(city, date, api_key):
    base_url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"
    response = requests.get(base_url)
    #print(response)
    data = response.json()
    #print(data)
    # Extract temperature, humidity, and weather description
    temperature = data['main']['temp'] - 273.15 # Convert from Kelvin to Celsius
    humidity = data['main']['humidity']
    weather_description = data['weather'][0]['description']
    return temperature, humidity, weather_description

In [106]:
import requests

def fetch_weather_data(city, date, api_key):
    base_url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"
    response = requests.get(base_url)

    # Print the entire response for debugging purposes
    print(response.json())

    data = response.json()

    # Check if the 'main' key is in the response
    if 'main' in data:
        temperature = data['main']['temp'] - 273.15  # Convert from Kelvin to Celsius
        humidity = data['main']['humidity']
        weather_description = data['weather'][0]['description']
        return temperature, humidity, weather_description
    else:
        # Handle the case where 'main' key is not found
        print("Error: 'main' key not found in the response.")
        return None, None, None

# Example usage
api_key = "3159ae9ae876db0990fa9835fbc288dc"
api_key = "8324bcc3c6e36168d126a13e9d9de7f3"
temp, humidity, description = fetch_weather_data('New York', '2025-02-02', api_key)
if temp is not None:
    print(f"Temp: {temp:.2f}°C, Humidity: {humidity}%, Weather: {description}")
else:
    print("Failed to retrieve weather data.")

{'coord': {'lon': -74.006, 'lat': 40.7143}, 'weather': [{'id': 701, 'main': 'Mist', 'description': 'mist', 'icon': '50n'}], 'base': 'stations', 'main': {'temp': 276.61, 'feels_like': 273.04, 'temp_min': 275.12, 'temp_max': 277.91, 'pressure': 1011, 'humidity': 92, 'sea_level': 1011, 'grnd_level': 1009}, 'visibility': 10000, 'wind': {'speed': 4.12, 'deg': 230}, 'clouds': {'all': 0}, 'dt': 1738924634, 'sys': {'type': 2, 'id': 2037026, 'country': 'US', 'sunrise': 1738929575, 'sunset': 1738966850}, 'timezone': -18000, 'id': 5128581, 'name': 'New York', 'cod': 200}
Temp: 3.46°C, Humidity: 92%, Weather: mist


**Step 3: Combine Weather Data with Sales Data**

In [107]:
# Loop through each row of the sales_data dataframe, call the function and update the dataframe with weather data
for index, row in sales_data.iterrows():
    temp, humidity, description = fetch_weather_data(row["store location"], row["date"],api_key)
    sales_data.at[index, "Temperature (°C)"] = temp
    sales_data.at[index, "Humidity (%)"] = humidity
    sales_data.at[index, "Weather Description"] = description

{'coord': {'lon': -74.006, 'lat': 40.7143}, 'weather': [{'id': 701, 'main': 'Mist', 'description': 'mist', 'icon': '50n'}], 'base': 'stations', 'main': {'temp': 276.61, 'feels_like': 273.04, 'temp_min': 275.12, 'temp_max': 277.91, 'pressure': 1011, 'humidity': 92, 'sea_level': 1011, 'grnd_level': 1009}, 'visibility': 10000, 'wind': {'speed': 4.12, 'deg': 230}, 'clouds': {'all': 0}, 'dt': 1738924634, 'sys': {'type': 2, 'id': 2037026, 'country': 'US', 'sunrise': 1738929575, 'sunset': 1738966850}, 'timezone': -18000, 'id': 5128581, 'name': 'New York', 'cod': 200}
{'coord': {'lon': -118.2437, 'lat': 34.0522}, 'weather': [{'id': 701, 'main': 'Mist', 'description': 'mist', 'icon': '50n'}], 'base': 'stations', 'main': {'temp': 288, 'feels_like': 288.02, 'temp_min': 287.08, 'temp_max': 288.8, 'pressure': 1018, 'humidity': 95, 'sea_level': 1018, 'grnd_level': 998}, 'visibility': 8047, 'wind': {'speed': 6.17, 'deg': 190}, 'clouds': {'all': 100}, 'dt': 1738924318, 'sys': {'type': 2, 'id': 2075946

In [108]:
sales_data

Unnamed: 0,date,product id,sales amount,store location,Temperature (°C),Humidity (%),Weather Description
0,2025-01-01,1877,155.17,New York,3.46,92.0,mist
1,2025-01-02,1581,55.56,Los Angeles,14.85,95.0,mist
2,2025-01-03,1054,256.73,Chicago,-6.24,54.0,broken clouds
3,2025-01-04,1684,153.83,Houston,21.01,91.0,overcast clouds
4,2025-01-05,1315,243.57,Phoenix,13.86,34.0,broken clouds
5,2025-01-06,1910,71.19,Philadelphia,3.44,96.0,mist
6,2025-01-07,1425,191.31,San Antonio,20.65,91.0,overcast clouds
7,2025-01-08,1789,120.39,San Diego,14.41,91.0,mist
8,2025-01-09,1725,64.67,Dallas,12.23,82.0,overcast clouds
9,2025-01-10,1817,220.32,San Jose,9.78,89.0,broken clouds


**Step 4: Load the Integrated Data into MongoDB**

Finally, you will load the integrated sales and weather data into MongoDB for future analysis

In [109]:
!pip install pymongo
from pymongo import MongoClient
from datetime import datetime


connection_string="mongodb+srv://tsjannoun123:KufyyNNqnno0atX9@cluster0.sb8py.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
# Connect to the MongoDB Atlas cluster
client = MongoClient(connection_string)

# Access a specific database
db = client['weather']



In [110]:
# Access a the collection sales within the database
sales = db['sales']

#Load the sales data

# Convert DataFrame to dictionary format
sales_dict = sales_data.to_dict(orient="records")

# Insert the sales data into MongoDB
sales.insert_many(sales_dict)

InsertManyResult([ObjectId('67a5e2e9c1c63a760a0a3c40'), ObjectId('67a5e2e9c1c63a760a0a3c41'), ObjectId('67a5e2e9c1c63a760a0a3c42'), ObjectId('67a5e2e9c1c63a760a0a3c43'), ObjectId('67a5e2e9c1c63a760a0a3c44'), ObjectId('67a5e2e9c1c63a760a0a3c45'), ObjectId('67a5e2e9c1c63a760a0a3c46'), ObjectId('67a5e2e9c1c63a760a0a3c47'), ObjectId('67a5e2e9c1c63a760a0a3c48'), ObjectId('67a5e2e9c1c63a760a0a3c49')], acknowledged=True)