In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/kaggle/input/stock-market-india/FullDataCsv/ICICIBANK__EQ__NSE__NSE__MINUTE.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
df.timestamp.describe()

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [None]:
df.timestamp.describe()

## Time series visualization

In [None]:
daily_data = df.resample('D', on='timestamp').agg({
    'open': 'first',  # First minute's open price
    'high': 'max',    # Maximum price during the day
    'low': 'min',     # Minimum price during the day
    'close': 'last',  # Last minute's close price
    'volume': 'sum'   # Daily volume (sum of per-minute volumes)
})

daily_data['daily_returns'] = daily_data['close'].pct_change()
daily_data['daily_volatility'] = daily_data['high'] - daily_data['low']

In [None]:
daily_data.describe()

In [None]:
daily_data.shape

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(daily_data.index, daily_data['close'], color='b', marker='o', linestyle='-', linewidth=1, markersize=2)
plt.fill_between(daily_data.index, daily_data['close'], color='skyblue', alpha=0.6)
plt.title('ICICIBANK_DAILY')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(daily_data.index, daily_data['daily_returns'], marker='o', linestyle='-', color='g')
plt.title('Daily Returns')
plt.xlabel('Date')
plt.ylabel('Daily Return')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(daily_data.index, daily_data['volume'], marker='o', linestyle='-', color='y')
plt.title('Daily Trading Volume')
plt.xlabel('Date')
plt.ylabel('Daily Volume')
plt.grid(True)
plt.show()

## Descriptive Statistics

In [None]:
mean_close = daily_data['close'].mean()
median_close = daily_data['close'].median()
std_close = daily_data['close'].std()

# Calculate percentiles
percentiles = daily_data['close'].quantile([0.25, 0.5, 0.75])

print(f"Mean Close Price: {mean_close:.2f}")
print(f"Median Close Price: {median_close:.2f}")
print(f"Standard Deviation of Close Price: {std_close:.2f}")
print("Percentiles:")
print(percentiles)

In [None]:
plt.figure(figsize=(8, 5))
summary_stats = [mean_close, median_close, std_close]
labels = ['Mean', 'Median', 'Std Dev']
plt.bar(labels, summary_stats, color=['blue', 'green', 'red'])
plt.title('Summary Statistics for Daily Closing Prices')
plt.ylabel('Value')
plt.show()

Identifying outliers or extreme values in a dataset, including daily stock closing prices, is crucial for data analysis. Outliers can significantly impact statistical measures and distort the analysis. 

There are various methods to identify outliers. One common method is the use of the Interquartile Range (IQR) method.

1. We calculate the Interquartile Range (IQR) of the daily closing prices, which is the difference between the first quartile (Q1) and the third quartile (Q3).
2. We define lower and upper bounds to identify potential outliers. Data points that fall below the lower bound or above the upper bound are considered potential outliers.
3. We use boolean indexing to identify and print the outliers.
4. Adjust the value 1.5 in the lower and upper bound definitions to control the sensitivity to outliers. A higher value makes it less sensitive, while a lower value makes it more sensitive.

In [None]:
Q1 = daily_data['close'].quantile(0.25)
Q3 = daily_data['close'].quantile(0.75)
IQR = Q3 - Q1

# Define lower and upper bounds to identify outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Identify outliers
outliers = daily_data[(daily_data['close'] < lower_bound) | (daily_data['close'] > upper_bound)]

# Print the identified outliers
print("Outliers:")
print(outliers)

## Visualization

### Line plot for daily closing prices

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(daily_data.index, daily_data['close'], marker='o', linestyle='-', color='b', markersize=4)
plt.title('Daily Closing Prices')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.grid(True)
plt.show()

### Candlestick chart

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Candlestick(x=daily_data.index,
                open=daily_data['open'],
                high=daily_data['high'],
                low=daily_data['low'],
                close=daily_data['close'])])

fig.show()


### Bar plot for daily trading volume

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(daily_data.index, daily_data['volume'], color='b')
plt.title('Daily Trading Volume')
plt.xlabel('Date')
plt.ylabel('Volume in crore')
plt.grid(True)
plt.show()

### Moving averages

In [None]:
df['50-Day MA'] = df['close'].rolling(window=50).mean()
df['200-Day MA'] = df['close'].rolling(window=200).mean()

In [None]:
df.tail()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['close'], label='Closing Price', color='b')
plt.plot(df.index, df['50-Day MA'], label='50-Day MA', color='g')
plt.plot(df.index, df['200-Day MA'], label='200-Day MA', color='r')

plt.title('Stock Price with Moving Averages')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()