In [1]:
import mysql.connector
from mysql.connector import Error
import os
import logging
from datetime import datetime
from tqdm import tqdm

In [2]:
# Database configuration details, Please replace the host, user, password, database name accordingly.
db_config = {
    'host': 'localhost',
    'user': 'root', 
    'password': 'root@123',  
    'database': 'corteva'  
}

In [3]:
def weather_data_statistics(db_config):
    '''Function to caculate avg_temp ( min and max) and total pricipitation from weather_data table
    and to ingest into a new table in the same database, called weather_statistics.
    '''
    try:
        conn = mysql.connector.connect(**db_config)
        if conn.is_connected():
            cursor = conn.cursor()

            # Calculate yearly statistics for each station, excluding missing data
            query = """
            SELECT station_id, 
                   YEAR(date) as year, 
                   AVG(max_temp) as avg_max_temp, 
                   AVG(min_temp) as avg_min_temp, 
                   SUM(precipitation) as total_precipitation
            FROM weather_data
            WHERE max_temp IS NOT NULL AND 
            min_temp IS NOT NULL AND 
            precipitation IS NOT NULL
            GROUP BY station_id, YEAR(date)
            """
            cursor.execute(query)
            results = cursor.fetchall()

            # Insert or update statistics in weather_statistics with tqdm progress bar
            for row in tqdm(results, desc="Updating statistics"):
                station_id, year, avg_max_temp, avg_min_temp, total_precipitation = row
                upsert_query = """
                INSERT INTO weather_statistics (station_id, year, avg_max_temp, avg_min_temp, total_precipitation)
                VALUES (%s, %s, %s, %s, %s)
                ON DUPLICATE KEY UPDATE avg_max_temp=VALUES(avg_max_temp), 
                                        avg_min_temp=VALUES(avg_min_temp),
                                        total_precipitation=VALUES(total_precipitation)
                """
                cursor.execute(upsert_query, (station_id, year, avg_max_temp, avg_min_temp, total_precipitation))
            
            conn.commit()
            print("Statistics successfully updated.")
    except Error as e:
        print("Error while connecting to MySQL", e)
    finally:
        if conn.is_connected():
            cursor.close()
            conn.close()
            print("MySQL connection is closed")

In [4]:
if __name__ == "__main__":
    weather_data_statistics(db_config)

Updating statistics: 100%|████████████████████████████████████████████████████████████████████| 4791/4791 [00:02<00:00, 2190.21it/s]


Statistics successfully updated.
MySQL connection is closed
