**APP.py**

In [28]:
# app.py
%%writefile app.py

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import datetime
import random
import hashlib
from sklearn.ensemble import IsolationForest
from cryptography.hazmat.primitives.asymmetric import rsa, padding
from cryptography.hazmat.primitives import hashes
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU

# Initialize session state
if 'auth' not in st.session_state:
    st.session_state.auth = {
        'user': None,
        'role': None,
        'otp': None,
        'logged_in': False
    }

# Security configurations
if 'rsa_keys' not in st.session_state:
    st.session_state.rsa_keys = {
        'private_key': rsa.generate_private_key(public_exponent=65537, key_size=2048),
        'public_key': None
    }
    st.session_state.rsa_keys['public_key'] = st.session_state.rsa_keys['private_key'].public_key()



# Security functions
# Add these near other security functions
def hash_password(password: str) -> str:
    return hashlib.sha256(password.encode()).hexdigest()

def load_users():
    try:
        df = pd.read_csv('users.csv')
        return df.set_index('username').to_dict(orient='index')
    except FileNotFoundError:
        default_users = pd.DataFrame({
            'username': ['admin', 'user1'],
            'password': [hash_password('admin123'), hash_password('userpass')],
            'role': ['admin', 'user']
        })
        default_users.to_csv('users.csv', index=False)
        return default_users.set_index('username').to_dict(orient='index')
def encrypt_input(text: str) -> bytes:
    return st.session_state.rsa_keys['public_key'].encrypt(
        text.encode('utf-8'),
        padding.OAEP(mgf=padding.MGF1(hashes.SHA256()),
                     algorithm=hashes.SHA256(), label=None)
    )

def decrypt_input(blob: bytes) -> str:
    return st.session_state.rsa_keys['private_key'].decrypt(
        blob,
        padding.OAEP(mgf=padding.MGF1(hashes.SHA256()),
                     algorithm=hashes.SHA256(), label=None)
    ).decode('utf-8')

def is_adversarial(vals):
    thr = [500, 500, 500]  # PM2.5, NO2, Ozone
    return any(v > t for v, t in zip(vals, thr))

def log_action(user, action, status, reason=""):
    ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_msg = f"[{ts}] USER={user} | ACTION={action} | STATUS={status} | {reason}"
    st.session_state.setdefault('security_log', []).append(log_msg)
# Add after existing security functions but before authentication page

### ===== DATA POISONING PROTECTION =====
def validate_training_data(data: pd.DataFrame) -> bool:
    """Enhanced validation for Pakistan-specific training"""
    try:
        # 1. Check required columns
        required_cols = [
            'Country',
            'PM2.5 AQI Value',
            'NO2 AQI Value',
            'Ozone AQI Value',
            'AQI Value'
        ]
        if not all(col in data.columns for col in required_cols):
            st.error("Missing required columns in dataset")
            return False

        # 2. Filter Pakistan data
        pak_data = data[data['Country'] == 'Pakistan']
        if len(pak_data) < 10:  # Minimum 10 samples
            st.error(f"Insufficient Pakistan data ({len(pak_data)} samples)")
            return False

        # 3. Check feature ranges (EPA standards)
        valid_ranges = {
            'PM2.5 AQI Value': (0, 500),
            'NO2 AQI Value': (0, 200),
            'Ozone AQI Value': (0, 500)
        }

        for col, (min_val, max_val) in valid_ranges.items():
            col_data = pak_data[col].dropna()
            if len(col_data) == 0:
                st.error(f"Empty column: {col}")
                return False
            if col_data.min() < min_val or col_data.max() > max_val:
                st.error(f"Invalid {col} range ({col_data.min()}-{col_data.max()})")
                return False

        return True

    except Exception as e:
        log_action("system", "Data Validation", "Failed", str(e))
        return False


        # Check 2: Anomaly detection
        detector = IsolationForest(contamination=0.01)
        detector.fit(data[['PM2.5 AQI Value','NO2 AQI Value','Ozone AQI Value']])
        anomalies = detector.predict(data.values)

        if sum(anomalies == -1) > len(data)*0.05:
            st.error("Excessive anomalies in training data")
            return False

        return True

    except Exception as e:
        log_action("system", "Data Validation", "Failed", str(e))
        return False

### ===== MODEL POISONING PROTECTION =====
def secure_model_save(model, filename: str):
    """Save model with integrity protection"""
    try:
        joblib.dump(model, filename)
        model_hash = hashlib.sha256(open(filename, 'rb').read()).hexdigest()
        with open(f"{filename}.hash", 'w') as f:
            f.write(model_hash)
        log_action(st.session_state.auth['user'], "Model Save", "Success", filename)
        return True
    except Exception as e:
        log_action("system", "Model Save", "Failed", str(e))
        return False

def secure_model_load(filename: str):
    """Load model with integrity checks"""
    try:
        current_hash = hashlib.sha256(open(filename, 'rb').read()).hexdigest()
        with open(f"{filename}.hash", 'r') as f:
            original_hash = f.read()

        if current_hash != original_hash:
            log_action("system", "Model Load", "Blocked", "Hash mismatch")
            return None

        return joblib.load(filename)
    except Exception as e:
        log_action("system", "Model Load", "Failed", str(e))
        return None

# Authentication page
def login_page():
    st.title("Zero Trust AI System")
    auth_mode = st.radio("Select Action", ["Login", "Sign Up"])

    if auth_mode == "Login":
        if not st.session_state.auth['logged_in']:
            username = st.text_input("Username")
            password = st.text_input("Password", type="password")

            if st.button("Login"):
                users = load_users()
                if username in users and hash_password(password) == users[username]["password"]:
                    st.session_state.auth['otp'] = str(random.randint(100000, 999999))
                    st.session_state.auth['user'] = username
                    st.session_state.auth['role'] = users[username]["role"]
                    st.success(f"OTP sent (simulated): {st.session_state.auth['otp']}")
                    log_action(username, "Login Attempt", "OTP Generated")
                else:
                    st.error("Invalid credentials")
                    log_action(username, "Login Attempt", "Failed", "Bad credentials")

        if st.session_state.auth.get('otp'):
            otp = st.text_input("Enter OTP")
            if st.button("Verify OTP"):
                if otp == st.session_state.auth['otp']:
                    st.session_state.auth['logged_in'] = True
                    log_action(st.session_state.auth['user'], "Login", "Success")
                    st.rerun()
                else:
                    st.error("Invalid OTP")
                    log_action(st.session_state.auth['user'], "Login", "Failed", "Bad OTP")

    elif auth_mode == "Sign Up":
        st.subheader("Create New Account")
        new_user = st.text_input("New Username")
        new_pass = st.text_input("New Password", type="password")
        confirm_pass = st.text_input("Confirm Password", type="password")

        if st.button("Create Account"):
            if new_pass != confirm_pass:
                st.error("Passwords don't match!")
                return

            users = load_users()
            if new_user in users:
                st.error("Username already exists")
                log_action(new_user, "Signup Attempt", "Failed", "Username taken")
            else:
                new_user_df = pd.DataFrame([{
                    'username': new_user,
                    'password': hash_password(new_pass),
                    'role': 'user'
                }])

                try:
                    existing = pd.read_csv('users.csv')
                    updated = pd.concat([existing, new_user_df])
                except FileNotFoundError:
                    updated = new_user_df

                updated.to_csv('users.csv', index=False)
                st.success("Account created! Please login.")
                log_action(new_user, "Signup", "Success")
# Main application
def main_app():
    st.sidebar.title(f"Logged in as {st.session_state.auth['user']} ({st.session_state.auth['role']})")
    if st.sidebar.button("Logout"):
        log_action(st.session_state.auth['user'], "Logout", "Success")
        st.session_state.auth = {
            'user': None,
            'role': None,
            'otp': None,
            'logged_in': False
        }
        st.rerun()

    if st.session_state.auth['role'] == "admin":
        menu = ["EDA", "Train Models", "Predict AQI", "Security Test"]
        st.sidebar.header("Security Logs")
        for log in reversed(st.session_state.get('security_log', [])[-5:]):
            st.sidebar.code(log)
    else:
        menu = ["Predict AQI"]

    choice = st.sidebar.selectbox("Menu", menu)

    # EDA Page
    if choice == "EDA":
        st.header("Exploratory Data Analysis")
        try:
            df = pd.read_csv('global air pollution dataset.csv')
            pak = df[df['Country']=='Pakistan'][['AQI Value','PM2.5 AQI Value','NO2 AQI Value','Ozone AQI Value']].dropna()

            st.subheader("Correlation Heatmap")
            fig, ax = plt.subplots(figsize=(6,5))
            sns.heatmap(pak.corr(), annot=True, cmap='coolwarm', fmt=".2f", ax=ax)
            st.pyplot(fig)

            st.subheader("AQI Distribution")
            fig, ax = plt.subplots(figsize=(6,4))
            sns.histplot(pak['AQI Value'], kde=True, bins=30, color='blue', ax=ax)
            st.pyplot(fig)

            st.subheader("Pairplot of Indicators")
            fig = sns.pairplot(pak, diag_kind='kde')
            st.pyplot(fig)
        except Exception as e:
            st.error(f"Error loading data: {str(e)}")

    # Train Models Page
    elif choice == "Train Models":
      st.header("Secure Model Training")
      if st.button("Run Full Training"):
          log_action(st.session_state.auth['user'], "Train Start", "OK")
          try:
              # Load and validate data
              data = pd.read_csv('global air pollution dataset.csv')
              if not validate_training_data(data):
                  return

              # Prepare Pakistan data
              pak_data = data[data['Country'] == 'Pakistan']
              features = pak_data[[
                  'PM2.5 AQI Value',
                  'NO2 AQI Value',
                  'Ozone AQI Value'
              ]].dropna()
              target = pak_data['AQI Value'].dropna()

              # Final data check
              if len(features) < 10 or len(target) < 10:
                  st.error("Insufficient data after cleaning")
                  return

              # Add privacy-preserving noise (1% of feature ranges)
              noise_scale = {
                  'PM2.5 AQI Value': 5,  # 1% of 500
                  'NO2 AQI Value': 2,     # 1% of 200
                  'Ozone AQI Value': 5     # 1% of 500
              }

              for col in features.columns:
                  features[col] += np.random.normal(0, noise_scale[col], features.shape[0])
              X = features
              y = target

              # Add differential privacy noise
              X += np.random.normal(0, 0.01, X.shape)
              scaler = StandardScaler()
              Xs = scaler.fit_transform(X)
              Xtr, Xte, ytr, yte = train_test_split(Xs, y, test_size=0.2, random_state=42)

              # 3. Secure Training Pipeline
              models = {
                  "Linear Regression": LinearRegression(),
                  "Random Forest": RandomForestRegressor(
                      n_estimators=100,
                      max_depth=10,
                      random_state=42,
                      ccp_alpha=0.01
                  ),
                  "Gradient Boost": GradientBoostingRegressor(
                      n_estimators=100,
                      random_state=42,
                      ccp_alpha=0.01
                  )
              }

              best_score = -np.inf
              best_model = None

              for name, model in models.items():
                  # Cross-validation check
                  scores = cross_val_score(model, Xs, y, cv=5, scoring='r2')
                  if np.mean(scores) < 0.7:
                      st.error(f"{name} failed quality check (R² < 0.7)")
                      continue

                  # Train model
                  model.fit(Xtr, ytr)
                  ypred = model.predict(Xte)

                  # Evaluate
                  score = r2_score(yte, ypred)
                  if score > best_score:
                      best_score = score
                      best_model = model

                  # Secure save individual model
                  if secure_model_save(model, f'secure_{name.replace(" ", "_")}.pkl'):
                      st.success(f"✅ {name} securely saved")

              # 4. Neural Network with Security
              try:
                  nn = Sequential([
                      Dense(128, input_dim=Xtr.shape[1], kernel_regularizer='l2'),
                      LeakyReLU(.01),
                      Dropout(.4),
                      Dense(64, kernel_regularizer='l2'),
                      LeakyReLU(.01),
                      Dropout(.4),
                      Dense(1)
                  ])
                  nn.compile(optimizer='adam', loss='mean_squared_error')
                  hist = nn.fit(Xtr, ytr, epochs=200, batch_size=8, verbose=0, validation_split=0.2)

                  # Secure save NN
                  nn.save('secure_neural_net.h5')
                  secure_model_save(scaler, 'secure_scaler.pkl')
                  st.success("✅ Neural Network securely saved")

              except Exception as e:
                  st.error(f"Neural Network training failed: {str(e)}")

              # 5. Save best traditional model
              if best_model and secure_model_save(best_model, 'secure_best_model.pkl'):
                  st.success(f"✅ Best model ({type(best_model).__name__}) securely saved")
                  log_action(st.session_state.auth['user'], "Train End", "OK")

          except Exception as e:
              st.error(f"Training failed: {str(e)}")
              log_action(st.session_state.auth['user'], "Train", "Failed", str(e))

    # Prediction Page
    elif choice == "Predict AQI":
        st.header("Secure AQI Prediction")
        pm25 = st.number_input("PM2.5 AQI", min_value=0, max_value=500)
        no2 = st.number_input("NO2 AQI", min_value=0, max_value=200)
        ozone = st.number_input("Ozone AQI", min_value=0, max_value=300)

        if st.button("Predict"):
            try:
                # Load model securely
                model = secure_model_load('secure_best_model.pkl')
                if not model:
                    st.error("Model loading failed security checks")
                    return

                if is_adversarial([pm25, no2, ozone]):
                    st.warning("⚠️ Blocked: Suspicious input detected!")
                    log_action(st.session_state.auth['user'], "Predict", "Blocked", "Adversarial input")
                    return

                inputs = [[pm25, no2, ozone]]
                pred = model.predict(inputs)[0]

                # Validate output sanity
                if pred < 0 or pred > 500:
                    st.error("Invalid prediction value")
                    log_action(st.session_state.auth['user'], "Predict", "Blocked", "Invalid output")
                    return

                st.success(f"Predicted AQI: {pred:.2f}")
                log_action(st.session_state.auth['user'], "Predict", "Success")

            except Exception as e:
                st.error(f"Prediction failed: {str(e)}")
                log_action(st.session_state.auth['user'], "Predict", "Failed", str(e))
    # Security Test Page

    elif choice == "Security Test":
        st.header("Security Tests")
        if st.button("Test Model Integrity"):
            model = secure_model_load('secure_model.pkl')
            if model:
                st.success("Model integrity verified ✅")
            else:
                st.error("Model compromised ❌")

        if st.button("Test Data Validation"):
            test_data = pd.DataFrame({
                'PM2.5 AQI Value': [600, -10],  # Invalid values
                'NO2 AQI Value': [50, 50],
                'Ozone AQI Value': [50, 50]
            })
            if validate_training_data(test_data):
                st.success("Data validation passed ✅")
            else:
                st.error("Data validation blocked ❌")

# Run the app
if not st.session_state.auth['logged_in']:
    login_page()
else:
    main_app()




Overwriting app.py


**Additional commands to run App**

In [None]:
!pip install streamlit ngrok --quiet
!pip install pyngrok --quiet
!ngrok config add-authtoken 2uokMB2UIxPvuHlutPwKmm8K9lq_7aHe6RyJa78Gfj2sLKzRz
!pip install streamlit

**Streamlit App with Ngrok**

In [6]:

from pyngrok import ngrok
import time
!streamlit run app.py --server.port 8501 &> /dev/null &

time.sleep(5)
ngrok.kill()
public_url = ngrok.connect(addr=8501, proto="http")
print("🚀 Open this URL to view your Streamlit app:\n", public_url)

🚀 Open this URL to view your Streamlit app:
 NgrokTunnel: "https://f9f0-35-202-226-94.ngrok-free.app" -> "http://localhost:8501"


In [13]:
!pip install bcrypt

Collecting bcrypt
  Downloading bcrypt-4.3.0-cp39-abi3-manylinux_2_34_x86_64.whl.metadata (10 kB)
Downloading bcrypt-4.3.0-cp39-abi3-manylinux_2_34_x86_64.whl (284 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bcrypt
Successfully installed bcrypt-4.3.0
