<a href="https://colab.research.google.com/github/madhansree005-coder/climatescope-project/blob/milestone-3/milestone%203...ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

st.set_page_config(page_title="ClimateScopeüåç", page_icon="üå¶Ô∏è", layout="wide")

st.markdown("""
<style>
.main {background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);}
h1 {color: white !important; text-align: center; padding: 20px;}
h2, h3 {color: white !important;}
.stTabs [data-baseweb="tab-list"] {background-color: rgba(255,255,255,0.15); padding: 15px; border-radius: 15px;}
.stTabs [data-baseweb="tab"] {background-color: rgba(255,255,255,0.2); color: white; font-weight: 700; padding: 12px 24px;}
.stTabs [aria-selected="true"] {background-color: rgba(255,255,255,0.5) !important; border: 2px solid white;}
.insight-box {background: linear-gradient(135deg, #f093fb, #f5576c); color: white; padding: 20px; border-radius: 12px; margin: 15px 0;}
[data-testid="stSidebar"] {background: linear-gradient(180deg, #667eea, #764ba2);}
</style>
""", unsafe_allow_html=True)

@st.cache_data
def load_data():
    try:
        df = pd.read_csv("GlobalWeatherRepository.csv")
        # Convert ALL potential date columns
        for col in df.columns:
            if any(keyword in col.lower() for keyword in ['date', 'time', 'updated']):
                df[col] = pd.to_datetime(df[col], errors='coerce')
        return df, None
    except Exception as e:
        return None, str(e)

st.title("üå¶Ô∏è ClimateScope - Global Weather Dashboard")
st.markdown("<div style='text-align:center; color:white; font-size:18px;'><b>Infosys Milestone 2 Project</b></div>", unsafe_allow_html=True)

df, error = load_data()

if df is None:
    st.error(f"‚ö†Ô∏è Error: {error}")
    uploaded = st.file_uploader("üìÅ Upload GlobalWeatherRepository.csv", type=['csv'])
    if uploaded:
        df = pd.read_csv(uploaded)
        # Convert date columns
        for col in df.columns:
            if any(keyword in col.lower() for keyword in ['date', 'time', 'updated']):
                df[col] = pd.to_datetime(df[col], errors='coerce')
    else:
        st.stop()

# Show first few rows to debug
if st.sidebar.checkbox("üîç Show Raw Data Sample"):
    st.sidebar.dataframe(df.head())

def find_col(df, keys):
    for key in keys:
        for col in df.columns:
            if key.lower() in col.lower():
                return col
    return None

col_date = find_col(df, ['last_updated', 'date', 'datetime'])
col_temp = find_col(df, ['temperature', 'temp'])
col_hum = find_col(df, ['humidity'])
col_wind = find_col(df, ['wind_kph', 'wind'])
col_press = find_col(df, ['pressure'])
col_country = find_col(df, ['country'])
col_loc = find_col(df, ['location', 'city'])
col_lat = find_col(df, ['latitude', 'lat'])
col_lon = find_col(df, ['longitude', 'lon'])
col_cond = find_col(df, ['condition', 'weather'])

with st.expander("üìä Dataset Info", expanded=True):
    c1, c2, c3 = st.columns(3)
    c1.metric("Records", f"{len(df):,}")
    c2.metric("Countries", df[col_country].nunique() if col_country else "N/A")
    c3.metric("Columns", len(df.columns))
    st.write("**Columns:**", list(df.columns))

st.sidebar.header("üéõÔ∏è Filters")
selected_countries = []
if col_country:
    all_countries = sorted(df[col_country].dropna().unique())
    selected_countries = st.sidebar.multiselect("üåç Countries", all_countries, all_countries[:5] if len(all_countries)>=5 else all_countries)

filtered_df = df.copy()
if selected_countries and col_country:
    filtered_df = filtered_df[filtered_df[col_country].isin(selected_countries)]

if col_date:
    try:
        min_dt = pd.to_datetime(df[col_date]).min()
        max_dt = pd.to_datetime(df[col_date]).max()

        # Check if datetime conversion worked
        if pd.notna(min_dt) and pd.notna(max_dt):
            min_dt = min_dt.date()
            max_dt = max_dt.date()
            dates = st.sidebar.date_input("üìÖ Date Range", [min_dt, max_dt], min_value=min_dt, max_value=max_dt)
            if len(dates)==2:
                filtered_df = filtered_df[(pd.to_datetime(filtered_df[col_date]).dt.date>=dates[0]) & (pd.to_datetime(filtered_df[col_date]).dt.date<=dates[1])]
    except:
        st.sidebar.warning("‚ö†Ô∏è Date filtering unavailable")

st.sidebar.success(f"‚úÖ {len(filtered_df):,} records")

tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["üìä Overview", "üìà Trends", "üåç Geographic", "üîó Correlations", "‚ö†Ô∏è Extremes", "üìã Summary"])

with tab1:
    st.header("üìä Overview")
    c1, c2, c3, c4 = st.columns(4)
    if col_temp: c1.metric("üå°Ô∏è Avg Temp", f"{filtered_df[col_temp].mean():.1f}¬∞C")
    if col_hum: c2.metric("üíß Humidity", f"{filtered_df[col_hum].mean():.1f}%")
    if col_wind: c3.metric("üå¨Ô∏è Wind", f"{filtered_df[col_wind].mean():.1f} kph")
    if col_press: c4.metric("üîµ Pressure", f"{filtered_df[col_press].mean():.0f} mb")

    if col_temp:
        fig = px.histogram(filtered_df, x=col_temp, color=col_country, nbins=60, title="Temperature Distribution", marginal="box")
        fig.update_layout(height=450, template='plotly_white')
        st.plotly_chart(fig, use_container_width=True)
        st.markdown('<div class="insight-box">üí° Histogram shows temperature spread across regions</div>', unsafe_allow_html=True)

with tab2:
    st.header("üìà Trends & Seasonality")
    if col_temp and col_date:
        fig = px.line(filtered_df.sort_values(col_date), x=col_date, y=col_temp, color=col_country, title="Temperature Trends")
        fig.update_layout(height=400, template='plotly_white')
        st.plotly_chart(fig, use_container_width=True)

        df_month = filtered_df.copy()
        df_month['month'] = df_month[col_date].dt.strftime('%B')
        fig2 = px.box(df_month, x='month', y=col_temp, color=col_country, title="Monthly Temperature")
        fig2.update_layout(height=400, template='plotly_white')
        st.plotly_chart(fig2, use_container_width=True)
        st.markdown('<div class="insight-box">üìÖ Monthly patterns reveal seasonal variations</div>', unsafe_allow_html=True)

with tab3:
    st.header("üåç Geographic Patterns")
    if col_temp and col_country:
        country_avg = filtered_df.groupby(col_country)[col_temp].mean().reset_index()
        fig = px.choropleth(country_avg, locations=col_country, locationmode='country names', color=col_temp,
                           color_continuous_scale='RdYlBu_r', title="Temperature by Country")
        fig.update_layout(height=500)
        st.plotly_chart(fig, use_container_width=True)

        if col_lat and col_lon:
            fig2 = px.scatter_geo(filtered_df, lat=col_lat, lon=col_lon, color=col_temp,
                                 hover_name=col_loc, color_continuous_scale='Turbo', projection='natural earth')
            fig2.update_layout(height=450)
            st.plotly_chart(fig2, use_container_width=True)

with tab4:
    st.header("üîó Correlations")
    numeric_cols = filtered_df.select_dtypes(include=[np.number]).columns.tolist()
    if len(numeric_cols)>1:
        corr = filtered_df[numeric_cols].corr()
        fig = px.imshow(corr, text_auto='.2f', color_continuous_scale='RdBu_r', zmin=-1, zmax=1, title="Correlation Matrix")
        fig.update_layout(height=500, template='plotly_white')
        st.plotly_chart(fig, use_container_width=True)
        st.markdown('<div class="insight-box">üîó Red=positive, Blue=negative correlation</div>', unsafe_allow_html=True)

with tab5:
    st.header("‚ö†Ô∏è Extreme Events")
    if col_temp:
        c1, c2 = st.columns(2)
        with c1:
            st.subheader("üî• Top 10 Hottest")
            hot = filtered_df.nlargest(10, col_temp)
            cols = [col_country, col_loc, col_temp, col_cond]
            cols = [c for c in cols if c in hot.columns]
            st.dataframe(hot[cols], use_container_width=True)

        with c2:
            st.subheader("‚ùÑÔ∏è Top 10 Coldest")
            cold = filtered_df.nsmallest(10, col_temp)
            st.dataframe(cold[cols], use_container_width=True)

        extremes = filtered_df[(filtered_df[col_temp]>40) | (filtered_df[col_temp]<0)]
        if len(extremes)>0:
            st.info(f"‚ö†Ô∏è Found {len(extremes)} extreme events")
            fig = px.histogram(extremes, x=col_temp, color=col_country, nbins=30, title="Extreme Temperatures")
            fig.update_layout(height=350, template='plotly_white')
            st.plotly_chart(fig, use_container_width=True)

with tab6:
    st.header("üìã Summary")
    st.dataframe(filtered_df.describe(), use_container_width=True)

    insights = []
    if col_temp:
        insights.append(f"üå°Ô∏è Temp: {filtered_df[col_temp].min():.1f}¬∞C to {filtered_df[col_temp].max():.1f}¬∞C")
    if col_country and col_temp:
        hottest = filtered_df.groupby(col_country)[col_temp].mean().idxmax()
        insights.append(f"üî• Hottest: {hottest}")

    for ins in insights:
        st.markdown(f'<div class="insight-box">{ins}</div>', unsafe_allow_html=True)

    csv = filtered_df.to_csv(index=False).encode('utf-8')
    st.download_button("üì• Download CSV", csv, f"weather_{datetime.now().strftime('%Y%m%d')}.csv", "text/csv")

st.markdown("<div style='text-align:center; color:white; padding:20px;'>üå¶Ô∏è ClimateScope Dashboard | Infosys Internship Project</div>", unsafe_allow_html=True)

Overwriting app.py
