In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
df = pd.read_csv('SolarRadiationMeasurementData.csv')

# Display the first few rows of the dataframe
print(df.head())

# Summary Statistics
print(df.describe())

# Data Quality Check
print(df.isnull().sum())  # Check for missing values

# Time Series Analysis
plt.figure(figsize=(10, 5))
plt.plot(df['Timestamp'], df['GHI'])
plt.title('GHI over time')
plt.xlabel('Timestamp')
plt.ylabel('GHI')
plt.show()

# Correlation Analysis
correlation = df.corr()
sns.heatmap(correlation, annot=True)
plt.show()

# Wind Analysis
plt.figure(figsize=(10, 5))
plt.plot(df['Timestamp'], df['WS'])
plt.title('Wind Speed over time')
plt.xlabel('Timestamp')
plt.ylabel('Wind Speed')
plt.show()

# Temperature Analysis
plt.figure(figsize=(10, 5))
plt.plot(df['Timestamp'], df['Tamb'], label='Ambient Temperature')
plt.plot(df['Timestamp'], df['TModA'], label='Module A Temperature')
plt.plot(df['Timestamp'], df['TModB'], label='Module B Temperature')
plt.title('Temperature over time')
plt.xlabel('Timestamp')
plt.ylabel('Temperature')
plt.legend()
plt.show()

# Histograms
df[['GHI', 'DNI', 'DHI', 'WS', 'Tamb']].hist(bins=30, figsize=(15, 10))
plt.show()

# Box Plots
plt.figure(figsize=(10, 5))
sns.boxplot(data=df[['GHI', 'DNI', 'DHI']])
plt.title('Box plot of Solar Radiation Data')
plt.show()

# Scatter Plots
plt.figure(figsize=(10, 5))
plt.scatter(df['GHI'], df['Tamb'])
plt.title('Scatter plot of GHI vs Tamb')
plt.xlabel('GHI')
plt.ylabel('Tamb')
plt.show()

# Data Cleaning
df = df.drop(columns=['Comments'])  # Drop the 'Comments' column as it is entirely null