In [2]:
from google.colab import files
uploaded = files.upload()

Saving wheatherdata.csv to wheatherdata.csv


In [3]:
!pip install plotly




In [15]:
import pandas as pd
import plotly.express as px
from datetime import datetime
import ast

# Load the dataset
file_path = '/content/wheatherdata.csv'  # Update this with your file path
data = pd.read_csv(file_path)

# Parse JSON-like columns
def parse_json_column(column):
    return column.apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else None)

data['location_parsed'] = parse_json_column(data['location'])
data['current_parsed'] = parse_json_column(data['current'])

# Extract useful fields
data['name'] = data['location_parsed'].apply(lambda x: x.get('name') if x else None)
data['region'] = data['location_parsed'].apply(lambda x: x.get('region') if x else None)
data['country'] = data['location_parsed'].apply(lambda x: x.get('country') if x else None)
data['last_updated_epoch'] = data['current_parsed'].apply(lambda x: x.get('last_updated_epoch') if x else None)
data['temperature'] = data['current_parsed'].apply(lambda x: x.get('temp_c') if x else None)

# Convert timestamp and clean up
data['last_updated'] = pd.to_datetime(data['last_updated_epoch'], unit='s')
data_cleaned = data[['name', 'region', 'country', 'last_updated', 'temperature']].dropna()

# List unique locations for selection
unique_locations = data_cleaned['name'].unique()
print(f"Available Locations: {unique_locations}")

# Specify the location to visualize (modify here or make dynamic)
selected_location = "Ahmedabad"  # Replace with any location from the printed list

# Filter data for the selected location
filtered_data = data_cleaned[data_cleaned['name'] == selected_location]

# Interactive visualization with Plotly
fig = px.line(
    filtered_data,
    x='last_updated',
    y='temperature',
    title=f"Interactive Temperature Trend for {selected_location}",
    labels={'last_updated': 'Timestamp', 'temperature': 'Temperature (°C)'},
    markers=True,
)
fig.update_traces(line=dict(color="royalblue", width=2))
fig.update_layout(
    title_font_size=20,
    xaxis=dict(title="Time", showgrid=True),
    yaxis=dict(title="Temperature (°C)", showgrid=True),
    template="plotly_dark",
)
fig.show()

# Aggregated Insights (Optional)
# Group by region or country and calculate average temperature
aggregated_data = data_cleaned.groupby('region').agg(
    average_temperature=('temperature', 'mean'),
    min_temperature=('temperature', 'min'),
    max_temperature=('temperature', 'max'),
).reset_index()

print("\nAggregated Temperature Insights by Region:")
print(aggregated_data)


Available Locations: ['Abohar' 'Adoni' 'Agartala' 'Agra' 'Ahmedabad' 'Ahmednagar' 'Aizawl'
 'Ajmer' 'Akola' 'Alappuzha' 'Aligarh' 'Alwar' 'Amaravati' 'Ambarnath'
 'Ambattur' 'Amravati' 'Amritsar' 'Amroha' 'Anand' 'Anantapur' 'Anantnag'
 'Arrah' 'Asansol' 'Aurangabad' 'Avadi' 'Bahraich' 'Ballia' 'Bally'
 'Bangalore' 'Baranagar' 'Barasat' 'Bardhaman' 'Bareilly' 'Barnala'
 'Batala' 'Begusarai' 'Belgaum' 'Bellary' 'Berhampore' 'Berhampur'
 'Bettiah' 'Bhagalpur' 'Pur Pur' 'Bharatpur' 'Bhatpara' 'Bhavnagar'
 'Bhilai' 'Bhilwara' 'Bhimavaram' 'Bhind' 'Bhiwandi' 'Bhiwani' 'Bhopal'
 'Bhubaneswar' 'Bhusawal' 'Bidar' 'Bihar Sharif' 'Bijapur' 'Bikaner'
 'Bilaspur' 'Bokaro' 'Bongaigaon' 'Budaun' 'Bulandshahr' 'Burhanpur'
 'Buxar' 'Chandigarh' 'Chandrapur' 'Chennai' 'Chhapra' 'Chinsurah'
 'Chittoor' 'Coimbatore' 'Cuttack' 'Danapur' 'Darbhanga' 'Dehradun'
 'Dehri' 'Delhi' 'Deoghar' 'Dewas' 'Dhanbad' 'Dharmavaram' 'Dhule'
 'Dibrugarh' 'Dimapur' 'Dindigul' 'Durgapur' 'Eluru' 'Erode' 'Etawah'
 'Faridabad


Aggregated Temperature Insights by Region:
               region  average_temperature  min_temperature  max_temperature
0                                32.000000             32.0             32.0
1      Andhra Pradesh            29.077778             22.7             32.6
2               Assam            31.075000             24.0             34.1
3               Bihar            30.366667             28.0             32.8
4          Chandigarh            20.100000             20.1             20.1
5        Chhattisgarh            29.150000             28.4             29.9
6             Gujarat            31.750000             30.0             34.0
7             Haryana            23.600000             22.3             24.1
8   Jammu and Kashmir            15.900000             15.9             15.9
9           Jharkhand            28.600000             27.6             29.4
10          Karnataka            32.666667             28.0             34.8
11             Kerala           

In [5]:
# Comparison of multiple states/regions
# Filter for specific states/regions to compare (modify this list as needed)
selected_states = ['Punjab', 'Uttar Pradesh', 'Gujarat']  # Replace with your desired states
comparison_data = data_cleaned[data_cleaned['region'].isin(selected_states)]

# Interactive comparison plot
fig = px.line(
    comparison_data,
    x='last_updated',
    y='temperature',
    color='region',
    line_group='name',
    title="Temperature Trends Across Selected States",
    labels={'last_updated': 'Timestamp', 'temperature': 'Temperature (°C)', 'region': 'State/Region'},
    markers=True,
)

# Customize the plot layout
fig.update_traces(line=dict(width=2))
fig.update_layout(
    title_font_size=20,
    xaxis=dict(title="Time", showgrid=True),
    yaxis=dict(title="Temperature (°C)", showgrid=True),
    legend_title="State/Region",
    template="plotly_dark",
)
fig.show()


In [16]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Prepare data for LSTM
location = 'Ahmedabad'
location_data = data_cleaned[data_cleaned['name'] == location]
location_data = location_data.set_index('last_updated')['temperature']

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(location_data.values.reshape(-1, 1))

# Create sequences
sequence_length = 10  # Use last 10 timestamps for prediction
X, y = [], []
for i in range(len(scaled_data) - sequence_length):
    X.append(scaled_data[i:i + sequence_length])
    y.append(scaled_data[i + sequence_length])

X, y = np.array(X), np.array(y)

# Check if there is enough data for training
if len(X) == 0:
    print(f"Not enough data for location {location} with sequence length {sequence_length}. Skipping LSTM model.")
else:
    # Train-Test Split
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Build LSTM Model
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        LSTM(50, return_sequences=False),
        Dense(25),
        Dense(1),
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, batch_size=16, epochs=20, verbose=0)

    # Make predictions
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(predictions)

    # Plot predictions
    plt.figure(figsize=(10, 6))
    plt.plot(location_data.index[-len(y_test):], scaler.inverse_transform(y_test), label='True Values')
    plt.plot(location_data.index[-len(y_test):], predictions, label='Predictions', linestyle='--')
    plt.title(f"LSTM Predictions for {location}", fontsize=16)
    plt.legend()
    plt.show()

Not enough data for location Ahmedabad with sequence length 10. Skipping LSTM model.


In [21]:
from prophet import Prophet
import pandas as pd
import matplotlib.pyplot as plt

# Prepare data for Prophet
location = 'Chennai'
location_data = data_cleaned[data_cleaned['name'] == location][['last_updated', 'temperature']]
location_data.rename(columns={'last_updated': 'ds', 'temperature': 'y'}, inplace=True)

# Print the length of the location data before fitting the model
print(f"Number of data points for {location}: {len(location_data)}")

# Determine which locations have enough data points (greater than 1).
location_counts = data_cleaned['name'].value_counts()
valid_locations = location_counts[location_counts > 1].index.tolist()
print(f"Locations with more than 1 data point: {valid_locations}")

# Check to see if the current location has enough data
if len(location_data) > 1:
    # Train Prophet Model
    prophet_model = Prophet()
    prophet_model.fit(location_data)

    # Forecast future values
    future = prophet_model.make_future_dataframe(periods=30, freq='H')  # Forecast next 30 hours
    forecast = prophet_model.predict(future)

    # Plot forecast
    fig = prophet_model.plot(forecast)
    plt.title(f"Prophet Forecast for {location}", fontsize=16)
    plt.show()
else:
  print(f"Not enough data points for location: {location} to train the Prophet model. Try a different location from the list above.")

Number of data points for Chennai: 1
Locations with more than 1 data point: ['Durgapur', 'Aurangabad']
Not enough data points for location: Chennai to train the Prophet model. Try a different location from the list above.
