In [1]:
import pandas as pd

DATA = '/kaggle/input/samsung-stock-price-dataset/Samsung Dataset.csv'

df = pd.read_csv(filepath_or_buffer=DATA, parse_dates=['Date'])
df['year'] = df['Date'].dt.year
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,year
0,2000-01-04,6000.0,6110.0,5660.0,6110.0,4449.709961,74195000,2000
1,2000-01-05,5800.0,6060.0,5520.0,5580.0,4063.72876,74680000,2000
2,2000-01-06,5750.0,5780.0,5580.0,5620.0,4092.859863,54390000,2000
3,2000-01-07,5560.0,5670.0,5360.0,5540.0,4034.598877,40305000,2000
4,2000-01-10,5600.0,5770.0,5580.0,5770.0,4202.098145,46880000,2000


Let's look first at the price history; because this is a successful company and we have a long series we probably need to look at the data on a logarithmic scale.

In [2]:
from plotly import express

express.line(data_frame=df, x='Date', y=['Open', 'High', 'Low', 'Close'], log_y=True).show()
express.line(data_frame=df, x='Date', y=['Close', 'Adj Close'], log_y=True).show()
express.line(data_frame=df, x='Date', y='Volume', log_y=True).show()

Our volume data looks pretty solid but we have a few days with anomalously low volume. 

In [3]:
express.scatter(data_frame=df, x='Date', y='Volume', trendline='lowess', log_y=True, color='year')

Volume has declined slightly over time even though price has risen substantially.

In [4]:
express.scatter(data_frame=df, x='Date', y='Adj Close', trendline='lowess', color='year', log_y=True)

Our price trendline is very smooth even when we use daily adjusted close values.

In [5]:
express.scatter(data_frame=df[['Date', 'Adj Close']].set_index(keys=['Date']).resample('ME').mean().reset_index(), x='Date', y='Adj Close', trendline='lowess', log_y=True)

In [6]:
express.scatter(data_frame=df[df['Volume'] > 2000000], x='Volume', y='Adj Close', color='year', log_x=True, log_y=True)

Here we have filtered out the volume outliers and we get almost an annual layer cake of close prices by year as Samsung's stock price has marched ever higher even as trading volume has declined.