
# Financial Data Analysis Basics

This notebook covers the basics of financial data analysis, including data cleaning, preparation, descriptive statistics, visualization, and time series analysis.
    

In [None]:

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
    


## Data Cleaning and Preparation

- Display the first few rows of the dataset.
- Check for missing values and duplicates.
- Format the 'Date' column to datetime format.
    

In [None]:

# Display the first few rows
# data.head()

# Check for missing values
# missing_values = data.isnull().sum()

# Check for duplicates
# duplicates = data.duplicated().sum()

# Format the 'Date' column
# data['Date'] = pd.to_datetime(data['Date'])
    


## Descriptive Statistics

Calculate basic statistics for numerical columns and visualize the data distribution using histograms and box plots.
    

In [None]:

# Calculate basic statistics
# descriptive_stats = data.describe()

# Calculate variance
# variance = data.var()

# Add variance to the descriptive statistics summary
# descriptive_stats.loc['variance'] = variance

# Set the aesthetics for plots
# sns.set(style="whitegrid")

# Creating histograms
# plt.figure(figsize=(15, 5))
# plt.subplot(1, 3, 1)
# sns.histplot(data['Open'], bins=30, kde=True)
# plt.title('Histogram of Open Prices')
# plt.subplot(1, 3, 2)
# sns.histplot(data['Close'], bins=30, kde=True)
# plt.title('Histogram of Close Prices')
# plt.subplot(1, 3, 3)
# sns.histplot(data['Volume'], bins=30, kde=True, color='green')
# plt.title('Histogram of Volume')
# plt.tight_layout()
# plt.show()

# Creating box plots
# plt.figure(figsize=(15, 5))
# plt.subplot(1, 3, 1)
# sns.boxplot(y=data['Open'])
# plt.title('Box Plot of Open Prices')
# plt.subplot(1, 3, 2)
# sns.boxplot(y=data['Close'])
# plt.title('Box Plot of Close Prices')
# plt.subplot(1, 3, 3)
# sns.boxplot(y=data['Volume'], color='green')
# plt.title('Box Plot of Volume')
# plt.tight_layout()
# plt.show()
    


## Financial Ratio Analysis

Calculate daily returns as a percentage change in the 'Close' price from one day to the next and plot a histogram of daily returns.
    

In [None]:

# Calculate daily returns
# data['Daily Return'] = data['Close'].pct_change() * 100

# Plotting the histogram of daily returns
# plt.figure(figsize=(10, 6))
# sns.histplot(data['Daily Return'].dropna(), bins=50, kde=True, color='blue')
# plt.title('Histogram of Daily Returns')
# plt.xlabel('Daily Return (%)')
# plt.ylabel('Frequency')
# plt.show()
    


## Time Series Analysis

Plot line charts for 'Close' prices and 'Volume' to identify trends and patterns.
    

In [None]:

# Plotting 'Close' prices
# plt.figure(figsize=(14, 7))
# plt.subplot(2, 1, 1)
# plt.plot(data['Date'], data['Close'], label='Close Price', color='blue')
# plt.title('Time Series of Close Prices')
# plt.xlabel('Date')
# plt.ylabel('Close Price')
# plt.legend()

# Plotting 'Volume'
# plt.subplot(2, 1, 2)
# plt.plot(data['Date'], data['Volume'], label='Volume', color='green')
# plt.title('Time Series of Volume')
# plt.xlabel('Date')
# plt.ylabel('Volume')
# plt.legend()
# plt.tight_layout()
# plt.show()
    