In [None]:
!pip install -q streamlit prophet tensorflow scikit-learn pyngrok plotly pandas numpy faker
!npm install -q localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K
up to date, audited 23 packages in 1s
[1G[0K⠸[1G[0K
[1G[0K⠸[1G[0K3 packages are looking for funding
[1G[0K⠸[1G[0K  run `npm fund` for details
[1G[0K⠸[1G[0K
2 [31m[1mhigh[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
[1G[0K⠼[1G[0K

In [None]:
import pandas as pd
import numpy as np
import uuid
from faker import Faker
from pandas.tseries.offsets import DateOffset

fake = Faker()
np.random.seed(42)

TRANSPORT_EMISSION_FACTORS = {
    'Flight': 0.85,    # kg CO2e/km
    'Maritime': 0.03,  # kg CO2e/km
    'Road': 0.21,     # kg CO2e/km
    'Train': 0.06      # kg CO2e/km
}

PACKAGING_IMPACT = {
    'plastic': 1.2,
    'paper': 0.9
}

def generate_dataset(num_entries=50000):
    """Generate synthetic e-commerce carbon footprint data"""
    data = []
    start_date = pd.to_datetime('2023-01-01 00:00:00')

    for i in range(num_entries):
        transport_mode = np.random.choice(
            ['Road', 'Flight', 'Maritime', 'Train'],
            p=[0.6, 0.1, 0.2, 0.1]
        )

        base_distance = {
            'Flight': np.random.lognormal(4.5, 0.3),
            'Maritime': np.random.lognormal(6.0, 0.4),
            'Road': np.random.lognormal(3.0, 0.2),
            'Train': np.random.lognormal(5.0, 0.3)
        }[transport_mode]

        distance = np.clip(base_distance, 10, 10000)
        packaging = np.random.choice(['plastic', 'paper'], p=[0.65, 0.35])

        base_emission = distance * TRANSPORT_EMISSION_FACTORS[transport_mode]
        emission = base_emission * PACKAGING_IMPACT[packaging] * np.random.uniform(0.95, 1.05)

        data.append({
            'transaction_id': str(uuid.uuid4()),
            'product_sku': fake.bothify(text='??-#####', letters='ABCDE'),
            'distance_km': round(distance, 2),
            'packaging_material': packaging,
            'supplier_location': fake.country(),
            'carbon_emission_kg': round(emission, 2),
            'order_value_usd': round(max(10, np.random.lognormal(3.5, 0.5)), 2),
            'transport_mode': transport_mode,
            'order_date': start_date + DateOffset(hours=i)
        })

    return pd.DataFrame(data)

# Generate and save dataset
df = generate_dataset(50000)
df.to_csv('ecommerce_carbon_dataset.csv', index=False)
print("✅ Dataset generated successfully")

✅ Dataset generated successfully


In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from prophet import Prophet
from prophet.serialize import model_to_json, model_from_json
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential, load_model
import joblib

class CarbonAnalytics:
    def __init__(self):
        self.df = pd.read_csv('ecommerce_carbon_dataset.csv', parse_dates=['order_date'])
        self.scaler = StandardScaler()

    def train_clusters(self):
        """Train product clusters for recommendations"""
        features = self.df[['distance_km', 'carbon_emission_kg', 'order_value_usd']]
        self.scaler.fit(features)
        self.kmeans = KMeans(n_clusters=5, n_init=10)
        self.kmeans.fit(self.scaler.transform(features))
        joblib.dump((self.scaler, self.kmeans), 'clustering_model.pkl')

    def train_prophet(self):
        """Train and save Prophet model using proper serialization"""
        prophet_df = self.df.rename(columns={
            'order_date': 'ds',
            'carbon_emission_kg': 'y'
        })[['ds', 'y']]

        model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=False
        )
        model.fit(prophet_df)

        # Save using Prophet's native serialization
        with open('prophet_model.json', 'w') as f:
            f.write(model_to_json(model))

    def train_lstm(self):
        """Train LSTM forecasting model"""
        scaler = MinMaxScaler()
        scaled_data = scaler.fit_transform(self.df[['carbon_emission_kg']])

        # Create sequences
        X, y = [], []
        for i in range(len(scaled_data)-24):
            X.append(scaled_data[i:i+24])
            y.append(scaled_data[i+24])

        model = Sequential([
            LSTM(64, return_sequences=True, input_shape=(24, 1)),
            Dropout(0.2),
            LSTM(32),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(np.array(X), np.array(y), epochs=10, batch_size=32, verbose=0)
        model.save('lstm_model.h5')
        joblib.dump(scaler, 'lstm_scaler.pkl')

# Initialize and train models
analytics = CarbonAnalytics()
analytics.train_clusters()
analytics.train_prophet()
analytics.train_lstm()
print("✅ Models trained successfully")

DEBUG:cmdstanpy:input tempfile: /tmp/tmp8f83fqv7/3hy6d36u.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8f83fqv7/i1_7z15z.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=29931', 'data', 'file=/tmp/tmp8f83fqv7/3hy6d36u.json', 'init=/tmp/tmp8f83fqv7/i1_7z15z.json', 'output', 'file=/tmp/tmp8f83fqv7/prophet_modelx62ft_u3/prophet_model-20250615201748.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:17:48 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:17:58 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
  super().__init__(**kwargs)


✅ Models trained successfully


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import plotly.express as px
from prophet.plot import plot_plotly
import joblib
from prophet.serialize import model_from_json

# Configuration
st.set_page_config(page_title="Carbon Analytics", layout="wide")
st.title("🌱 E-commerce Carbon Intelligence")

# Load assets
@st.cache_data
def load_data():
    return pd.read_csv('ecommerce_carbon_dataset.csv', parse_dates=['order_date'])

@st.cache_resource
def load_models():
    # Load clustering model
    scaler, kmeans = joblib.load('clustering_model.pkl')

    # Load Prophet model
    with open('prophet_model.json', 'r') as f:
        prophet_model = model_from_json(f.read())

    # Load LSTM components
    lstm_scaler = joblib.load('lstm_scaler.pkl')

    return {
        'scaler': scaler,
        'kmeans': kmeans,
        'prophet': prophet_model,
        'lstm_scaler': lstm_scaler
    }

df = load_data()
models = load_models()

# Sidebar controls
with st.sidebar:
    st.header("Filters")
    raw_date_range = st.date_input("Date Range",
        [df['order_date'].min(), df['order_date'].max()])

    # Convert to pandas timestamps
    date_range = [pd.Timestamp(d) for d in raw_date_range]

    transport_modes = st.multiselect("Transport Modes",
        df['transport_mode'].unique())

# Filter data with compatible types
filtered_df = df[
    (df['order_date'].between(*date_range)) &
    (df['transport_mode'].isin(transport_modes))
]

# Filter data
filtered_df = df[
    (df['order_date'].between(*date_range)) &
    (df['transport_mode'].isin(transport_modes))
]

# Dashboard sections
tab1, tab2, tab3 = st.tabs(["Analytics", "Forecasting", "Recommendations"])

with tab1:
    col1, col2 = st.columns(2)
    with col1:
        st.subheader("Emission Distribution")
        fig = px.histogram(filtered_df, x='carbon_emission_kg', nbins=50)
        st.plotly_chart(fig, use_container_width=True)

    with col2:
        st.subheader("Transport Impact")
        transport_stats = filtered_df.groupby('transport_mode')['carbon_emission_kg'].mean()
        st.bar_chart(transport_stats)

with tab2:
    model_type = st.radio("Select Model", ["Prophet", "LSTM"], horizontal=True)

    if model_type == "Prophet":
        future = models['prophet'].make_future_dataframe(periods=365)
        forecast = models['prophet'].predict(future)
        fig = plot_plotly(models['prophet'], forecast)
        st.plotly_chart(fig, use_container_width=True)
    else:
        scaled_data = models['lstm_scaler'].transform(filtered_df[['carbon_emission_kg']])
        # Add LSTM prediction logic here

with tab3:
    st.subheader("Optimization Suggestions")
    if not filtered_df.empty:
        sample_tx = filtered_df.sample(1).iloc[0].to_dict()
        features = models['scaler'].transform([[sample_tx['distance_km'],
                                              sample_tx['carbon_emission_kg'],
                                              sample_tx['order_value_usd']]])
        cluster = models['kmeans'].predict(features)[0]
        recommendations = {
            0: "✅ Efficient transaction - maintain current practices",
            1: f"🚚 Switch to rail (Current: {sample_tx['transport_mode']})",
            2: f"📦 Use paper packaging (Current: {sample_tx['packaging_material']})",
            3: f"🌿 Carbon offset for ${sample_tx['order_value_usd']} order",
            4: f"📈 Optimize packaging for {sample_tx['distance_km']}km shipment"
        }
        st.success(recommendations.get(cluster, "No recommendation available"))
    else:
        st.warning("No transactions match current filters")

Overwriting app.py


In [None]:
from pyngrok import ngrok

# Set ngrok authtoken (replace with your token)
ngrok.set_auth_token("2yYh6CEZctF7pKa67YPQ26URNPo_7Qd9a9U7PxNY3dAXX6E18")

# Start Streamlit
!streamlit run app.py --server.port 8501 &>/dev/null &

# Create tunnel
# Pass the port number directly as the first argument
public_url = ngrok.connect(8501) # Changed from port=8502 to 8501 based on streamlit server port
print(f"🌍 Dashboard URL: {public_url}")

🌍 Dashboard URL: NgrokTunnel: "https://909b-34-143-128-250.ngrok-free.app" -> "http://localhost:8501"
