In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from scipy import stats
from scipy.stats import norm, skew


from sklearn.model_selection import train_test_split, KFold, GroupKFold, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import *

import sys, os
import random

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

from IPython import display, utils

ModuleNotFoundError: No module named 'scipy'

In [None]:
data1 = pd.read_csv("labels.csv")
data2 = pd.read_csv("satellite_pos.csv")
data3 = pd.read_csv("solar_wind.csv")
data4 = pd.read_csv("sunspots.csv")
# Display the column names
print(data1.columns)
# Display the column names
print(data2.columns)
# Display the column names
print(data3.columns)
# Display the column names
print(data4.columns)

In [None]:
# show the data
data1.head()

In [None]:
data2.head()

In [None]:
data3.head()

In [None]:
data4.head()

In [None]:
len(solar_wind)

In [None]:
solar_wind.describe()

In [None]:
print("AVG Speed: ",solar_wind.describe()["speed"]["mean"])
print("MAX Speed: ",solar_wind.describe()["speed"]["max"])
print("MIN Speed: ",solar_wind.describe()["speed"]["min"])
print("STD Speed: ",solar_wind.describe()["speed"]["std"])

In [None]:
boxplot = solar_wind.boxplot(column=['speed'])

In [None]:
solar_wind.groupby("period").describe()

In [None]:
# Check the data types
solar_wind.dtypes

In [None]:
# Describe based on groups
t1 = solar_wind.groupby(['period']).describe().T
print(t1.to_string())

In [None]:
for col in solar_wind.columns[solar_wind.dtypes==object].tolist():
    print(solar_wind.groupby([col])[col].count(), '\n')

In [None]:
print("Solar wind shape: ", solar_wind.shape)
solar_wind.head()

In [None]:
### Visualizations
pd.options.plotting.backend = "matplotlib"

In [None]:
solar_wind.shape

In [None]:
# Check for NULL values
solar_wind.isnull().sum()

In [None]:
solar_wind.hist(bins = 20, figsize = (20,10));

In [None]:
# Time series plot
for col in ['bx_gse', 'by_gse', 'bz_gse']:
    fig = solar_wind[solar_wind.period == 'train_a'].plot( y = [col]);

In [None]:
solar_wind[['bx_gse', 'period']].boxplot()

In [None]:
solar_wind[['bx_gse', 'period']].boxplot(by = 'period')

In [None]:
sns.pairplot(solar_wind.iloc[:100])

In [None]:
# Correlation plot
# cor = solar_wind.corr()
sns.heatmap

In [None]:
pd.options.plotting.backend = "plotly"
num_cols = solar_wind.columns[solar_wind.dtypes != object].tolist()

In [None]:
from pptx import Presentation
from pptx.util import Inches
# Load the dataset
csv_file_path = 'labels.csv'
data = pd.read_csv(csv_file_path)

# Convert timedelta to a more usable format
data['timedelta'] = pd.to_timedelta(data['timedelta'])

# Set up the plotting style
sns.set(style="whitegrid")

# Initialize the presentation
prs = Presentation()

# Slide 1: Title Slide
slide = prs.slides.add_slide(prs.slide_layouts[0])
title = slide.shapes.title
subtitle = slide.placeholders[1]
title.text = "Analysis of Dataset"
subtitle.text = "A concise overview\nYour Name\nDate"

# Slide 2: Dataset Overview
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
content = slide.placeholders[1]
title.text = "Dataset Overview"
content.text = ("This dataset contains the following columns:\n"
                "- Period: Different phases (e.g., train_a).\n"
                "- Timedelta: Time intervals.\n"
                "- DST: Some measured value.\n")

# Slide 3: Data Distribution - Histogram of DST values
plt.figure(figsize=(10, 6))
sns.histplot(data['dst'], bins=30, kde=True, color='blue')
plt.title('Distribution of DST Values')
plt.xlabel('DST')
plt.ylabel('Frequency')
plt.savefig('dst_distribution.png')
plt.close()

slide = prs.slides.add_slide(prs.slide_layouts[5])
title = slide.shapes.title
title.text = "Distribution of DST Values"
left = Inches(1)
top = Inches(1.5)
pic = slide.shapes.add_picture('dst_distribution.png', left, top, width=Inches(8))

# Slide 4: Time-based Analysis - Line chart of DST over timedelta
plt.figure(figsize=(12, 6))
for period in data['period'].unique():
    period_data = data[data['period'] == period]
    plt.plot(period_data['timedelta'], period_data['dst'], label=period)
plt.title('DST Changes Over Time')
plt.xlabel('Timedelta')
plt.ylabel('DST')
plt.legend(title='Period')
plt.savefig('dst_over_time.png')
plt.close()

slide = prs.slides.add_slide(prs.slide_layouts[5])
title = slide.shapes.title
title.text = "DST Changes Over Time"
left = Inches(1)
top = Inches(1.5)
pic = slide.shapes.add_picture('dst_over_time.png', left, top, width=Inches(8))

# Slide 5: Period Comparison - Bar chart of average DST values for each period
average_dst = data.groupby('period')['dst'].mean().reset_index()

plt.figure(figsize=(10, 6))
sns.barplot(x='period', y='dst', data=average_dst, palette='viridis')
plt.title('Average DST Values by Period')
plt.xlabel('Period')
plt.ylabel('Average DST')
plt.savefig('average_dst.png')
plt.close()

slide = prs.slides.add_slide(prs.slide_layouts[5])
title = slide.shapes.title
title.text = "Average DST Values by Period"
left = Inches(1)
top = Inches(1.5)
pic = slide.shapes.add_picture('average_dst.png', left, top, width=Inches(8))

# Slide 6: Key Insights & Conclusion
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
content = slide.placeholders[1]
title.text = "Key Insights & Conclusion"
content.text = ("Summary of key findings:\n"
                "- Noticeable trends in DST over time.\n"
                "- Significant differences between periods.\n\n"
                "Final thoughts and possible next steps for further analysis:\n"
                "- Detailed time series analysis to understand underlying patterns.\n"
                "- Investigate correlation with external factors.\n"
                "- Segment data for more granular analysis.\n"
                "- Develop predictive models to forecast future DST values.\n")

# Save the presentation
prs.save('analysis_presentation.pptx')