In [1]:
import pandas as pd
import matplotlib.pyplot as plt

# 1. Load the Stock Market Dataset
df = pd.read_csv('stock_data.csv', parse_dates=['Date'])

# 2. Apply MultiIndexing for Companies & Dates
df.set_index(['Company', 'Date'], inplace=True)
df.sort_index(inplace=True)

# 3. Reshape Data Using melt()
df_reset = df.reset_index()
melted = pd.melt(df_reset, id_vars=['Company', 'Date'], value_vars=['Open Price', 'Close Price'],
                 var_name='Price Type', value_name='Price')

# 4. Use stack() and unstack() to Modify Indexing
stacked = df.stack()
unstacked = stacked.unstack(level=0)

# 5. Apply Rolling Window Functions (rolling())
rolling_avg = df.groupby(level=0)['Close Price'].rolling(7).mean().reset_index(level=0, drop=True)
df['7D_MA_Close'] = rolling_avg

# 6. Use Expanding Window Functions (expanding())
df['Expanding_Max_Close'] = df.groupby(level=0)['Close Price'].expanding().max().reset_index(level=0, drop=True)

# 7. Compare Vectorization vs Loops for Performance
# Vectorized
df['Pct_Change'] = df.groupby(level=0)['Close Price'].pct_change()

# Loop
df['Loop_Pct_Change'] = 0.0
for company in df.index.get_level_values(0).unique():
    prices = df.loc[company, 'Close Price']
    changes = prices.pct_change()
    df.loc[company, 'Loop_Pct_Change'] = changes

# 8. Optimize Data Processing with apply() instead of .iterrows()
def adjust_price(row):
    return row['Close Price'] * 0.98  # example adjustment factor

df['Adjusted_Close'] = df.reset_index().groupby('Company').apply(lambda x: x.apply(adjust_price, axis=1)['Close Price']).values

# 9. Compute Aggregations with groupby()
monthly_volume = df.reset_index().groupby([pd.Grouper(key='Date', freq='M'), 'Company'])['Volume'].sum()

# 10. Handle Missing Data Efficiently
df['Close Price'] = df['Close Price'].interpolate()
df.fillna(method='bfill', inplace=True)

# 11. Sort Data by Date and Company
df.sort_values(by=['Company', 'Date'], inplace=True)
df.sort_index(inplace=True)

# 12. Generate Visualizations with Pandas & Matplotlib
selected_companies = ['AAPL', 'GOOGL']
df_reset = df.reset_index()
for company in selected_companies:
    company_data = df_reset[df_reset['Company'] == company]
    plt.plot(company_data['Date'], company_data['Close Price'], label=company)

plt.legend()
plt.title('Stock Price Trends')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# 13. Save the Processed Data to a New File
df.to_csv('processed_stock_data.csv')



KeyError: 'Close Price'

In [None]:
!pip install pandas matplotlib






[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
