In [1]:
import requests
import pandas as pd
from datetime import datetime
from IPython.core.display import display,HTML
import boto3
from botocore.exceptions import NoCredentialsError
import os
from extract_weather_data import weather_data

  from IPython.core.display import display,HTML


In [None]:
def get_weather_api():
    url = "http://api.weatherstack.com/current"
    querystring = {"access_key" : weather_access_key, "query": "London"}

    try:
        response = requests.get(url, params=querystring)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None

In [None]:

def create_dataframe(response):
    weather_data = {
            "city": response["request"]["query"],
            "local_time": response["location"]["localtime"],
            "observation_time": response["current"]["observation_time"],
            "temperature": response["current"]["temperature"],
            "weather_description": response["current"]["weather_descriptions"][0], 
            "wind_speed": response["current"]["wind_speed"],
            "wind_degree": response["current"]["wind_degree"],
            "precipitation": response["current"]["precip"],
            "humidity": response["current"]["humidity"],
            "cloudcover": response["current"]["cloudcover"],
            "feelslike": response["current"]["feelslike"],
            "uv_index": response["current"]["uv_index"],
            "visibility": response["current"]["visibility"]
        }
    weather_df = pd.DataFrame(weather_data)

    return weather_df

In [None]:

def standardise_weather_df_columns(weather_df):
    """ 
    standardises the weather_df
    - Splits 'local_time' into 'localDate' and 'localTime' columns.
    - Converts 'observation_time' from 12-hour format to 24-hour format.
    - Converts 'wind_speed' from km/h to m/h.

    Parameters
    pyspark.sql.DataFrame:  weather_df

    Returns 
    pyspark.sql.DataFrame: The transformed weather DataFrame.
    """

    #split local_time to date and time columns
    weather_df[['localDate', 'localTime']] = weather_df['local_time'].str.split(' ', expand=True)
    weather_df.drop(columns = ["local_time"], inplace = True)
    weather_data = weather_data.drop("localDate")

    #change observation time from 12 hours to 24 hours
    weather_df['observation_time'] = pd.to_datetime(weather_df['observation_time'], format='%I:%M:%S %p')
    weather_df['observation_time'] = weather_df['observation_time'].datetime.strftime('%H:%M')
    
    #converts the wind_speed from km/h to m/h 
    weather_df["wind_speed"] = weather_df["wind_speed"] * 1000 /1.609

In [None]:
def upload_to_bucket(weather_data):

    if weather_data is not None:
        s3_client = boto3.client('s3',
                         aws_access_key_id = aws_access_key,
                         aws_secret_access_key =aws_secret_key,
                         region_name='eu-west-2'
                        )
    
        bucket = "weather_data"
        object = "raw_weather_data"

        try:
            s3_client.put_object(
                Bucket = bucket,
                Key = object,
                Body = weather_data.encode('utf-8')
                ContentType = 'application/json'
            )
        except PartialCredentialsError:
            print("Partial credentials provided, please check your access key and secret key")
        except NoCredentialsError:
            print("Credentials not available or incorrect")
        except Exception as e:
            print(f"Upload to s3://weather_data/raw_weather_data failed: {e}")

    else:
        print("No weather data to upload.")
        return 

In [None]:
def fetch_and_process_data():
    weather_response = get_weather_api()
    if weather_response is not None:
        weather_df = create_dataframe(weather_response)
        upload_to_bucket(weather_response)
        transformed_weather_df = standardise_weather_df_columns(weather_df)
    return transformed_weather_df


In [4]:
json_response = {'request': {'type': 'City', 'query': 'Leicester, United Kingdom', 'language': 'en', 'unit': 'm'}, 'location': {'name': 'Leicester', 'country': 'United Kingdom', 'region': 'Leicestershire', 'lat': '52.635', 'lon': '-1.137', 
'timezone_id': 'Europe/London', 'localtime': '2024-07-09 20:17', 'localtime_epoch': 1720556220, 'utc_offset': '1.0'}, 
'current': {'observation_time': '07:17 PM', 'temperature': 20, 'weather_code': 116, 
'weather_icons': ['https://cdn.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0002_sunny_intervals.png'], 
'weather_descriptions': ['Partly cloudy'], 'wind_speed': 15, 'wind_degree': 170, 'wind_dir': 'S', 'pressure': 1007, 'precip': 0, 'humidity': 83, 'cloudcover': 75, 'feelslike': 20, 
'uv_index': 4, 'visibility': 10, 'is_day': 'yes'}}



There are various data analyses that can be performed using the JSON response provided. The JSON response contains detailed weather information, and by aggregating data over time or from multiple locations, you can derive insightful analytics. Below are some of the potential analyses:

1. Descriptive Statistics
Temperature Trends: Analyze daily, weekly, and monthly average temperatures to identify trends.
Humidity Levels: Track average, minimum, and maximum humidity levels over time.
Wind Speed and Direction: Analyze the distribution and trends of wind speed and direction.

2. Weather Patterns and Anomalies
Weather Conditions Frequency: Calculate the frequency of different weather descriptions (e.g., 'Partly cloudy', 'Sunny').
Anomaly Detection: Identify unusual weather patterns, such as sudden drops in temperature or unexpected precipitation.

3. Time Series Analysis
Seasonal Trends: Identify seasonal patterns in temperature, humidity, and other weather parameters.
Moving Averages: Compute moving averages for smoothing temperature and humidity data to understand long-term trends.

4. Correlation Analysis
Correlation Between Parameters: Study correlations between different weather parameters, such as temperature and humidity, or wind speed and cloud cover.
Impact on Visibility: Analyze how weather conditions (e.g., humidity, precipitation) affect visibility.

5. Comparative Analysis
Comparison Across Locations: If data from multiple locations is available, compare weather conditions across different cities or regions.
Historical Comparison: Compare current weather data with historical data to understand changes and trends over the years.

6. Predictive Analysis
Weather Forecasting: Use historical weather data to build predictive models for forecasting future weather conditions.
Trend Extrapolation: Extrapolate current trends to predict future weather patterns.

7. Geospatial Analysis
Mapping Weather Conditions: Visualize weather data on a map to see spatial distribution and patterns.
Heatmaps: Create heatmaps for temperature, humidity, and other parameters to identify hotspots.

8. Health and Safety Insights
UV Index Analysis: Analyze the UV index to understand the potential risk for skin exposure.
Weather Impact on Health: Study how different weather conditions (e.g., high humidity, low temperatures) correlate with health issues or emergency service calls.

9. Climate Change Analysis
Long-term Trends: Track long-term trends in temperature, precipitation, and other parameters to study the impact of climate change.
Extreme Weather Events: Analyze the frequency and intensity of extreme weather events over time.