<a href="https://colab.research.google.com/github/sameer23MCS120/Image-Similarity-Detection-Using-Siamese-Neural-Networks/blob/main/assingnment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
pip install pandas numpy scikit-learn matplotlib seaborn



In [13]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns

class BatteryDataGenerator:
    def __init__(self, n_samples=1000):
        self.n_samples = n_samples
        np.random.seed(42)

    def generate_data(self):
        """Generate realistic battery dataset"""
        start_date = datetime(2024, 1, 1)
        timestamps = [start_date + timedelta(minutes=i*15) for i in range(self.n_samples)]

        data = {
            'Fixed_Battery_Voltage': np.random.normal(12.5, 0.5, self.n_samples).clip(11.0, 14.0),
            'Fixed_Battery_Current': np.random.normal(2.0, 1.0, self.n_samples).clip(0, 5.0),
            'Fixed_Battery_Temperature': np.random.normal(35, 5, self.n_samples).clip(20, 45),
            'Portable_Battery_Voltage': np.random.normal(11.8, 0.8, self.n_samples).clip(10.0, 13.5),
            'Portable_Battery_Current': np.random.normal(1.5, 1.2, self.n_samples).clip(0, 4.5),
            'Portable_Battery_Temperature': np.random.normal(38, 6, self.n_samples).clip(20, 50),
            'Motor_Status': np.random.choice([0, 1], self.n_samples, p=[0.7, 0.3]),
            'BCM_Battery_Selected': np.random.choice([0, 1], self.n_samples, p=[0.6, 0.4]),
            'Timestamp': timestamps
        }

        df = pd.DataFrame(data)
        df['Effective_SOC'] = df.apply(self._calculate_effective_soc, axis=1)
        return df

    def _calculate_effective_soc(self, row):
        """Calculate effective SOC based on battery parameters"""
        base_soc = 100 * (
            (row['Fixed_Battery_Voltage'] - 11.0) / (14.0 - 11.0) * 0.6 +
            (row['Portable_Battery_Voltage'] - 10.0) / (13.5 - 10.0) * 0.4
        )

        temp_penalty = 0.1 * (
            max(0, row['Fixed_Battery_Temperature'] - 40) +
            max(0, row['Portable_Battery_Temperature'] - 40)
        )

        current_penalty = 0.05 * (
            row['Fixed_Battery_Current'] +
            row['Portable_Battery_Current']
        )

        motor_impact = 5 if row['Motor_Status'] == 1 else 0

        soc = base_soc - temp_penalty - current_penalty - motor_impact
        return max(0, min(100, soc))

class BatterySOCAnalyzer:
    def __init__(self):
        self.model = None
        self.scaler = StandardScaler()
        self.feature_importance = None
        self.feature_columns = [
            'Fixed_Battery_Voltage',
            'Portable_Battery_Voltage',
            'Fixed_Battery_Current',
            'Portable_Battery_Current',
            'Motor_Status',
            'BCM_Battery_Selected',
            'Portable_Battery_Temperature',
            'Fixed_Battery_Temperature'
        ]
        self.target_column = 'Effective_SOC'

    def analyze_data(self, df):
        """Perform comprehensive data analysis"""
        self.df = df
        analysis_results = {
            'correlations': self._analyze_correlations(),
            'kpis': self._calculate_kpis(),
            'model_metrics': self._train_and_evaluate_model(),
            'feature_importance': self.feature_importance
        }
        return analysis_results

    def _analyze_correlations(self):
        """Analyze feature correlations with target"""
        correlations = {}
        for feature in self.feature_columns:
            correlation = self.df[feature].corr(self.df[self.target_column])
            correlations[feature] = correlation
        return pd.Series(correlations).sort_values(ascending=False)

    def _calculate_kpis(self):
        """Calculate battery performance KPIs"""
        return {
            'Charge_Cycles': {
                'value': len(self.df[self.df['Fixed_Battery_Current'] > 0]),
                'description': 'Number of charging cycles'
            },
            'Average_SOC': {
                'value': self.df['Effective_SOC'].mean(),
                'description': 'Average State of Charge'
            },
            'Temperature_Impact': {
                'value': self.df['Portable_Battery_Temperature'].corr(self.df['Effective_SOC']),
                'description': 'Temperature correlation with SOC'
            },
            'Voltage_Efficiency': {
                'value': (self.df['Portable_Battery_Voltage'] / self.df['Fixed_Battery_Voltage']).mean(),
                'description': 'Average voltage efficiency'
            }
        }

    def _train_and_evaluate_model(self):
        """Train and evaluate the ML model"""
        X = self.df[self.feature_columns]
        y = self.df[self.target_column]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        param_grid = {
            'n_estimators': [100, 200],
            'max_depth': [10, 20, None],
            'min_samples_split': [2, 5]
        }

        rf = RandomForestRegressor(random_state=42)
        grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train_scaled, y_train)

        self.model = grid_search.best_estimator_
        y_pred = self.model.predict(X_test_scaled)

        self.feature_importance = pd.DataFrame({
            'feature': self.feature_columns,
            'importance': self.model.feature_importances_
        }).sort_values('importance', ascending=False)

        return {
            'R2_Score': r2_score(y_test, y_pred),
            'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),
            'MAE': mean_absolute_error(y_test, y_pred),
            'Best_Parameters': grid_search.best_params_
        }

    def visualize_results(self, analysis_results):
        """Generate visualizations of the analysis results"""
        plt.figure(figsize=(15, 10))

        # Feature Importance Plot
        plt.subplot(2, 2, 1)
        sns.barplot(x='importance', y='feature', data=self.feature_importance)
        plt.title('Feature Importance')

        # Correlation Heatmap
        plt.subplot(2, 2, 2)
        sns.heatmap(self.df[self.feature_columns + [self.target_column]].corr(), annot=True, cmap='coolwarm')
        plt.title('Correlation Heatmap')

        # SOC Distribution
        plt.subplot(2, 2, 3)
        sns.histplot(self.df['Effective_SOC'], bins=30)
        plt.title('SOC Distribution')

        # Temperature vs SOC
        plt.subplot(2, 2, 4)
        sns.scatterplot(data=self.df, x='Portable_Battery_Temperature', y='Effective_SOC')
        plt.title('Temperature vs SOC')

        plt.tight_layout()
        plt.savefig('battery_analysis_results.png')
        plt.close()

def main():
    # Generate dataset
    print("Generating battery dataset...")
    generator = BatteryDataGenerator(n_samples=1000)
    df = generator.generate_data()

    # Analyze data
    print("Performing analysis...")
    analyzer = BatterySOCAnalyzer()
    results = analyzer.analyze_data(df)

    # Print results
    print("\nAnalysis Results:")
    print("\nModel Performance:")
    for metric, value in results['model_metrics'].items():
        print(f"{metric}: {value}")

    print("\nTop Feature Importance:")
    print(results['feature_importance'])

    print("\nKey Performance Indicators:")
    for kpi, info in results['kpis'].items():
        print(f"{kpi}: {info['value']:.4f} - {info['description']}")

    # Generate visualizations
    analyzer.visualize_results(results)
    print("\nVisualizations have been saved to 'battery_analysis_results.png'")

    # Save dataset
    df.to_csv('battery_data.csv', index=False)
    print("Dataset has been saved to 'battery_data.csv'")

if __name__ == "__main__":
    main()

Generating battery dataset...
Performing analysis...

Analysis Results:

Model Performance:
R2_Score: 0.9781998226112396
RMSE: 1.9035212858389188
MAE: 1.4113705869532123
Best_Parameters: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 200}

Top Feature Importance:
                        feature  importance
0         Fixed_Battery_Voltage    0.532438
1      Portable_Battery_Voltage    0.443324
4                  Motor_Status    0.010078
7     Fixed_Battery_Temperature    0.003959
6  Portable_Battery_Temperature    0.003570
3      Portable_Battery_Current    0.003019
2         Fixed_Battery_Current    0.002992
5          BCM_Battery_Selected    0.000619

Key Performance Indicators:
Charge_Cycles: 982.0000 - Number of charging cycles
Average_SOC: 48.8466 - Average State of Charge
Temperature_Impact: 0.0014 - Temperature correlation with SOC
Voltage_Efficiency: 0.9435 - Average voltage efficiency

Visualizations have been saved to 'battery_analysis_results.png'
Dataset has been 