In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models

# Function to load and preprocess data
def load_and_preprocess_data(solar_wind_path, sunspots_path, labels_path, num_entries=100000):
    # Load the first 1000 entries from each CSV file
    solar_wind_data = pd.read_csv(solar_wind_path, nrows=num_entries)
    sunspots_data = pd.read_csv(sunspots_path, nrows=num_entries)
    labels_data = pd.read_csv(labels_path, nrows=num_entries)

    # Convert timedelta to separate columns for days and time
    def convert_timedelta(df):
        df['timedelta'] = pd.to_timedelta(df['timedelta'])
        df['days'] = df['timedelta'].dt.days
        df['time'] = df['timedelta'] - pd.to_timedelta(df['days'], unit='d')
        return df

    # Apply the function to each dataset
    solar_wind_data = convert_timedelta(solar_wind_data)
    sunspots_data = convert_timedelta(sunspots_data)
    labels_data = convert_timedelta(labels_data)

    # Merge data based on timestamps
    merged_data = pd.merge(solar_wind_data, sunspots_data, on='days', how='inner')
    merged_data = pd.merge(merged_data, labels_data, on='days', how='inner')

    # Feature selection
    selected_features = ['bx_gse', 'by_gse', 'bz_gse', 'theta_gse','phi_gse', 'bx_gsm', 'by_gsm', 'bz_gsm', 'theta_gsm', 'phi_gsm', 'bt','density', 'speed', 'temperature', 'source']

    # Extract features and target variable
    X = merged_data[selected_features]
    y = merged_data['dst']

    return X, y

# Specify file paths
solar_wind_path = '/content/drive/MyDrive/aurora/Train_on_this/solar_wind.csv'
sunspots_path = '/content/drive/MyDrive/aurora/Train_on_this/sunspots_smooth.csv'
labels_path = '/content/drive/MyDrive/aurora/Train_on_this/labels(dst).csv'

# Load and preprocess data with 1000 entries
X, y = load_and_preprocess_data(solar_wind_path, sunspots_path, labels_path)

# Exclude non-numeric columns
numeric_columns = X.select_dtypes(include=['number']).columns
X_numeric = X[numeric_columns]

# Preprocessing
scaler = StandardScaler()
imputer = SimpleImputer(strategy='mean')
X_scaled = scaler.fit_transform(imputer.fit_transform(X_numeric))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Check if the resulting train set is not empty
if len(X_train) > 0:
    # Define a simple neural network model
    model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)  # Output layer for regression
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

    # Save the trained model
    model.save('/content/drive/MyDrive/aurora/trained_model1.h5')
    print("Model saved successfully.")

    # Evaluate the model on the test set
    predictions = model.predict(X_test).flatten()

    # Evaluate the model
    rmse = mean_squared_error(y_test, predictions, squared=False)
    print(f'Root Mean Squared Error (RMSE): {rmse}')
else:
    print("Error: Empty training set.")


In [None]:

# Function to load and preprocess data
def load_and_preprocess_data1(solar_wind_path, sunspots_path,  num_entries=1000000):
    # Load the first 1000 entries from each CSV file
    solar_wind_data = pd.read_csv(solar_wind_path, nrows=num_entries)
    sunspots_data = pd.read_csv(sunspots_path, nrows=num_entries)

    # Convert timedelta to separate columns for days and time
    def convert_timedelta(df):
        df['timedelta'] = pd.to_timedelta(df['timedelta'])
        df['days'] = df['timedelta'].dt.days
        df['time'] = df['timedelta'] - pd.to_timedelta(df['days'], unit='d')
        return df

    # Apply the function to each dataset
    solar_wind_data = convert_timedelta(solar_wind_data)
    sunspots_data = convert_timedelta(sunspots_data)

    # Merge data based on timestamps
    merged_data = pd.merge(solar_wind_data, sunspots_data, on='days', how='inner')

    # Feature selection
    selected_features = ['bx_gse', 'by_gse', 'bz_gse', 'theta_gse','phi_gse', 'bx_gsm', 'by_gsm', 'bz_gsm', 'theta_gsm', 'phi_gsm', 'bt','density', 'speed', 'temperature', 'source']

    # Extract features and target variable
    X = merged_data[selected_features]

    return X

In [None]:
from tensorflow.keras.models import load_model

solar_wind_path = '/content/drive/MyDrive/aurora/Dont_Train_on_this/solar_wind.csv'
sunspots_path = '/content/drive/MyDrive/aurora/Dont_Train_on_this/sunspots_smooth.csv'

# Load and preprocess data with 100000 entries
X = load_and_preprocess_data1(solar_wind_path, sunspots_path, num_entries=100000)

# Exclude non-numeric columns
numeric_columns = X.select_dtypes(include=['number']).columns
X_numeric = X[numeric_columns]

scaler = StandardScaler()
imputer = SimpleImputer(strategy='mean')
X_scaled = scaler.fit_transform(imputer.fit_transform(X_numeric))

# Load the trained model
model = load_model('/content/drive/MyDrive/aurora/trained_model.h5')

# Make predictions
predictions = model.predict(X_scaled).flatten()

# Create a DataFrame with predictions
predictions_df = pd.DataFrame({'Predictions': predictions}, index=X.index)

# Save predictions to a new file
predictions_df.to_csv('/content/drive/MyDrive/aurora/predictions.csv')