In [None]:
pip install geopy


In [1]:
import pandas as pd
import numpy as np
import requests
from geopy.distance import geodesic
from datetime import datetime

# Step 1: Fetch Data
api_url = "http://192.168.82.20:8080/api/export-data"
response = requests.get(api_url)

if response.status_code == 200:
    try:
        api_data = response.json()
        file_url = api_data.get("file_url")

        if not file_url:
            print("Error: No file URL found in the API response.")
            exit()

        file_response = requests.get(file_url)
        if file_response.status_code == 200:
            json_data = file_response.json()

            # Ensure JSON is in list format
            if isinstance(json_data, dict):
                json_data = [json_data]

            df = pd.DataFrame(json_data)

            # Step 2: Handle Missing Data
            df.replace("null", np.nan, inplace=True)
            df.fillna(0, inplace=True)

            # Convert numeric columns
            numeric_cols = ['engine_size', 'transmission_type', 'fuel_type', 'milage', 'co2', 'gprs', 'sms', 'phone', 'id', 'user_id']
            for col in numeric_cols:
                df[col] = pd.to_numeric(df[col], errors='coerce')

            # Convert lat/long to float
            df[['from_lat', 'from_long', 'to_lat', 'to_long']] = df[['from_lat', 'from_long', 'to_lat', 'to_long']].astype(float)

            # Step 3: Calculate Travel Distance (Haversine Formula)
            def calculate_distance(row):
                start = (row['from_lat'], row['from_long'])
                end = (row['to_lat'], row['to_long'])
                return geodesic(start, end).km  # Distance in KM

            df['distance_km'] = df.apply(calculate_distance, axis=1)

            # Step 4: Calculate Duration in Hours
            def calculate_duration(row):
                start_time = datetime.strptime(row["start_date_time"], "%Y-%m-%d %H:%M:%S")
                end_time = datetime.strptime(row["end_date_time"], "%Y-%m-%d %H:%M:%S")
                duration_seconds = (end_time - start_time).total_seconds()
                return duration_seconds / 3600  # Convert to hours

            df['duration_hours'] = df.apply(calculate_duration, axis=1)

            # Step 5: Compute Final CO₂ Calculation
            def calculate_final_co2(row):
                base_co2 = row['co2'] * row['duration_hours']  # Multiply CO2 by duration
                gprs_factor = float(row['gprs']) * 50  # Example factor for GPRS impact
                call_factor = float(row['phone']) * 10  # Calls contribute to CO₂
                sms_factor = float(row['sms']) * 1  # SMS has minor impact

                # Final CO₂ formula
                total_co2 = base_co2 + gprs_factor + call_factor + sms_factor
                return total_co2

            df['final_co2'] = df.apply(calculate_final_co2, axis=1)

            # Step 6: Send Updated Emission Data to API
            update_api_url = "http://192.168.82.20:8080/api/updateemition"
            for index, row in df.iterrows():
                payload = {
                    "id": int(row['id']),
                    "user_id": int(row['user_id']),
                    "co2": row['final_co2']
                }
                update_response = requests.post(update_api_url, json=payload)

                if update_response.status_code == 200:
                    print(f"Successfully updated emission for ID: {row['id']}")
                else:
                    print(f"Failed to update emission for ID: {row['id']}. Status Code: {update_response.status_code}")

            # Step 7: Print Final Analysis
            #print(df[['id', 'user_id', 'co2', 'final_co2', 'distance_km', 'duration_hours']])

        else:
            print(f"Failed to download JSON file. Status Code: {file_response.status_code}")

    except ValueError as e:
        print(f"Error processing API response: {e}")
else:
    print(f"Failed to fetch data. Status Code: {response.status_code}")


Failed to fetch data. Status Code: 404
