In [4]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# For handling dates and times
from datetime import datetime

# Optional: to suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Load dataset
df = pd.read_csv('stock_data.csv')

# Display the first 5 rows
df.head()

# Check the data types and for any missing values
df.info()


# Check for duplicates
duplicates = df.duplicated().sum()
print(f'Number of duplicates: {duplicates}')

# Remove duplicates if present
df = df.drop_duplicates()

# Handle missing values
df = df.dropna()  # Or you can impute missing values as needed

# Convert date column to datetime if necessary
df['Date'] = pd.to_datetime(df['Date'])

# Check the cleaned data
df.info()


# Basic descriptive statistics
df.describe()


# Plot the closing price over time
plt.figure(figsize=(10,6))
plt.plot(df['Date'], df['Close'], label='Closing Price')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.title('Stock Price Over Time')
plt.grid(True)
plt.legend()
plt.show()


# Calculate daily returns
df['Daily Return'] = df['Close'].pct_change()

# Plot the daily returns distribution
plt.figure(figsize=(10,6))
sns.histplot(df['Daily Return'].dropna(), bins=50, kde=True)
plt.title('Distribution of Daily Returns')
plt.xlabel('Daily Return')
plt.ylabel('Frequency')
plt.show()


# Calculate annualized volatility (assuming 252 trading days per year)
volatility = df['Daily Return'].std() * np.sqrt(252)
print(f'Annualized Volatility: {volatility:.4f}')


# Calculate correlation matrix
correlation_matrix = df[['Open', 'High', 'Low', 'Close', 'Volume']].corr()

# Plot the correlation matrix
plt.figure(figsize=(10,6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()


# Calculate moving averages
df['50-Day MA'] = df['Close'].rolling(window=50).mean()
df['200-Day MA'] = df['Close'].rolling(window=200).mean()

# Plot the moving averages along with the closing price
plt.figure(figsize=(12,6))
plt.plot(df['Date'], df['Close'], label='Closing Price')
plt.plot(df['Date'], df['50-Day MA'], label='50-Day Moving Average', color='orange')
plt.plot(df['Date'], df['200-Day MA'], label='200-Day Moving Average', color='green')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.title('Stock Price and Moving Averages')
plt.legend()
plt.grid(True)
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'stock_data.csv'