In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = 'path_to_your_file/stocks.csv'  # Change this to your actual file path
stocks_df = pd.read_csv(file_path)

# Convert Date to datetime format
stocks_df['Date'] = pd.to_datetime(stocks_df['Date'])

# 1. Checking for missing data
missing_data = stocks_df.isnull().sum()
print("Missing Data:\n", missing_data)

# 2. Statistical summary of the dataset
print("\nStatistical Summary:\n", stocks_df.describe())

# 3. Plotting stock prices over time for Open, Close, High, Low
plt.figure(figsize=(10, 6))
plt.plot(stocks_df['Date'], stocks_df['Open'], label='Open')
plt.plot(stocks_df['Date'], stocks_df['Close'], label='Close')
plt.plot(stocks_df['Date'], stocks_df['High'], label='High')
plt.plot(stocks_df['Date'], stocks_df['Low'], label='Low')
plt.title('Stock Prices (Open, Close, High, Low) Over Time')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('stock_prices_over_time.png')  # Save plot as PNG

# 4. Distribution of stock volume
plt.figure(figsize=(10, 6))
sns.histplot(stocks_df['Volume'], bins=30, kde=True)
plt.title('Distribution of Stock Volume')
plt.xlabel('Volume')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()
plt.savefig('stock_volume_distribution.png')  # Save plot as PNG

# 5. Correlation heatmap
plt.figure(figsize=(10, 6))
corr = stocks_df[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.savefig('correlation_heatmap.png')  # Save plot as PNG

plt.show()
