### Important DataFrame Functions

In [3]:
# astype


Collecting pandas
  Downloading pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (19 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.0.0-cp312-cp312-macosx_14_0_arm64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl (11.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading numpy-2.0.0-cp312-cp312-macosx_14_0_arm64.whl (5.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.4/345.4 kB[0m [31

In [7]:
# value_counts

1. **Identify all the top-performing stocks based on their overall return from the start to the end of the dataset.**
   - **Hint:** Calculate the return as (final close price - initial close price) / initial close price.

2. **How many instances of daily price change greater than 5% (super over finishes) occurred for each stock?**
   - **Hint:** Use the `pct_change()` method on the 'Close' price to calculate daily percentage changes.

3. **Calculate the number of times each stock's price increased on a specific date range (e.g., within the first quarter of each year).**
   - **Hint:** Filter the dataset by date and then compare 'Close' and 'Open' prices.

4. **Calculate the percentage of times the opening price being higher than the previous day's closing price (toss winner) resulted in a higher closing price on that day (match winner).**
   - **Hint:** Use the `shift()` method to compare the opening price with the previous day's closing price.

5. **Identify all assets with a daily percentage change higher than 8% and a volume greater than 100,000.**
   - **Hint:** Apply conditions on 'Daily Change' and 'Volume' columns.

6. **Identify all stocks in the Technology sector with a daily percentage change higher than 7.5%.**
   - **Hint:** Merge the datasets on 'Ticker' and filter based on the sector and daily change.

7. **Write a function that can return the performance track record of two assets against each other over time.**
   - **Hint:** Create a function that merges the close prices of two tickers on the 'Date' column.

8. **Identify and remove duplicate rows from both datasets. How many duplicates were removed?**
   - **Hint:** Use the `duplicated()` method to find and remove duplicate rows.

9. **For the fundamental dataset, identify rows where the Market Cap is NaN, and impute these values using a sector-wise median Market Cap.**
   - **Hint:** Group by sector and fill NaN values with the sector median.

10. **Filter the combined dataset to find the top 5 most volatile stocks based on the standard deviation of daily percentage changes.**
    - **Hint:** Calculate the standard deviation of daily percentage changes for each stock and sort the results.

In [4]:
import pandas as pd
import numpy as np

In [5]:
import pandas as pd
import numpy as np

# Function to create a synthetic dataset for a given asset type
def create_asset_data(tickers, start_date, end_date):
    dates = pd.date_range(start=start_date, end=end_date, freq='B')
    data = []
    for ticker in tickers:
        np.random.seed(42)  # For reproducibility
        price = np.random.randn(len(dates)).cumsum() + 100
        for i in range(len(dates)):
            open_price = price[i] + np.random.uniform(-1, 1)
            high_price = max(open_price, price[i] + np.random.uniform(0, 2))
            low_price = min(open_price, price[i] - np.random.uniform(0, 2))
            close_price = price[i] + np.random.uniform(-1, 1)
            volume = np.random.randint(1000, 1000000)
            # Introduce NaN values randomly
            if np.random.rand() < 0.05:
                open_price = np.nan
            if np.random.rand() < 0.05:
                high_price = np.nan
            if np.random.rand() < 0.05:
                low_price = np.nan
            if np.random.rand() < 0.05:
                close_price = np.nan
            if np.random.rand() < 0.05:
                volume = np.nan
            data.append([dates[i], ticker, open_price, high_price, low_price, close_price, volume])
    return pd.DataFrame(data, columns=['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume'])

# Define tickers for each asset type
stock_tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'TSLA']
forex_tickers = ['EUR/USD', 'GBP/USD', 'USD/JPY', 'AUD/USD', 'USD/CAD']
crypto_tickers = ['BTC/USD', 'ETH/USD', 'XRP/USD', 'LTC/USD', 'BCH/USD']

# Generate data for each asset type
stock_data = create_asset_data(stock_tickers, '2015-01-01', '2023-01-01')
forex_data = create_asset_data(forex_tickers, '2015-01-01', '2023-01-01')
crypto_data = create_asset_data(crypto_tickers, '2015-01-01', '2023-01-01')

# Combine all data into a single DataFrame
combined_data = pd.concat([stock_data, forex_data, crypto_data])

# Introduce duplicate rows
combined_data = pd.concat([combined_data, combined_data.sample(frac=0.05, random_state=42)])  # Add 5% duplicates

# Save to CSV
combined_data.to_csv('algotrading_combined_dataset.csv', index=False)

print("Dataset created and saved to 'algotrading_combined_dataset.csv'")


Dataset created and saved to 'algotrading_combined_dataset.csv'


In [7]:
import pandas as pd
import numpy as np

# List of real-life stock tickers and company names (a sample list)
tickers_and_companies = [
    ('AAPL', 'Apple Inc.'),
    ('GOOGL', 'Alphabet Inc.'),
    ('MSFT', 'Microsoft Corporation'),
    ('AMZN', 'Amazon.com, Inc.'),
    ('TSLA', 'Tesla, Inc.'),
    ('FB', 'Meta Platforms, Inc.'),
    ('BRK.B', 'Berkshire Hathaway Inc.'),
    ('JNJ', 'Johnson & Johnson'),
    ('JPM', 'JPMorgan Chase & Co.'),
    ('V', 'Visa Inc.'),
    # Add more real-life tickers and company names to reach at least 1000 companies
]

# Extend the list to include at least 1000 companies
while len(tickers_and_companies) < 1000:
    tickers_and_companies.extend(tickers_and_companies[:1000 - len(tickers_and_companies)])

# Define sectors and industries
sectors = ['Technology', 'Healthcare', 'Finance', 'Consumer Goods', 'Utilities', 'Energy', 'Industrial Goods']
industries = {
    'Technology': ['Software', 'Hardware', 'Semiconductors'],
    'Healthcare': ['Pharmaceuticals', 'Biotechnology', 'Medical Devices'],
    'Finance': ['Banks', 'Insurance', 'Investment Services'],
    'Consumer Goods': ['Beverages', 'Food Products', 'Household Products'],
    'Utilities': ['Electric Utilities', 'Gas Utilities', 'Water Utilities'],
    'Energy': ['Oil & Gas', 'Renewable Energy', 'Coal'],
    'Industrial Goods': ['Aerospace', 'Construction', 'Manufacturing']
}

# Generate random data for each ticker
data = []
for ticker, company_name in tickers_and_companies:
    sector = np.random.choice(sectors)
    industry = np.random.choice(industries[sector])
    market_cap = round(np.random.uniform(0.1, 500), 2)  # in billions
    eps = round(np.random.uniform(-10, 10), 2)
    pe_ratio = round(np.random.uniform(5, 50), 2)
    dividend_yield = round(np.random.uniform(0, 10), 2)
    pb_ratio = round(np.random.uniform(0.1, 20), 2)
    de_ratio = round(np.random.uniform(0, 3), 2)

    # Introduce NaN values randomly
    if np.random.rand() < 0.1: market_cap = np.nan
    if np.random.rand() < 0.1: eps = np.nan
    if np.random.rand() < 0.1: pe_ratio = np.nan
    if np.random.rand() < 0.1: dividend_yield = np.nan
    if np.random.rand() < 0.1: pb_ratio = np.nan
    if np.random.rand() < 0.1: de_ratio = np.nan

    data.append([ticker, company_name, sector, industry, market_cap, eps, pe_ratio, dividend_yield, pb_ratio, de_ratio])

# Convert to DataFrame
columns = ['Ticker', 'Company Name', 'Sector', 'Industry', 'Market Cap (B)', 'EPS', 'P/E Ratio', 'Dividend Yield (%)', 'P/B Ratio', 'D/E Ratio']
df = pd.DataFrame(data, columns=columns)

# Introduce duplicate rows
df = pd.concat([df, df.sample(frac=0.05, random_state=42)])  # Add 5% duplicates

# Save to CSV
df.to_csv('fundamental_research_dataset_large.csv', index=False)

print("Dataset created and saved to 'fundamental_research_dataset_large.csv'")

Dataset created and saved to 'fundamental_research_dataset_large.csv'




### Dataset 1: Fundamental Research Dataset

This dataset contains fundamental data for real-life companies, with some rows containing NaN values and duplicates.

```python
import pandas as pd
import numpy as np

# List of real-life stock tickers and company names (a sample list)
tickers_and_companies = [
    ('AAPL', 'Apple Inc.'),
    ('GOOGL', 'Alphabet Inc.'),
    ('MSFT', 'Microsoft Corporation'),
    ('AMZN', 'Amazon.com, Inc.'),
    ('TSLA', 'Tesla, Inc.'),
    ('FB', 'Meta Platforms, Inc.'),
    ('BRK.B', 'Berkshire Hathaway Inc.'),
    ('JNJ', 'Johnson & Johnson'),
    ('JPM', 'JPMorgan Chase & Co.'),
    ('V', 'Visa Inc.'),
    # Add more real-life tickers and company names to reach at least 1000 companies
]

# Extend the list to include at least 1000 companies
while len(tickers_and_companies) < 1000:
    tickers_and_companies.extend(tickers_and_companies[:1000 - len(tickers_and_companies)])

# Define sectors and industries
sectors = ['Technology', 'Healthcare', 'Finance', 'Consumer Goods', 'Utilities', 'Energy', 'Industrial Goods']
industries = {
    'Technology': ['Software', 'Hardware', 'Semiconductors'],
    'Healthcare': ['Pharmaceuticals', 'Biotechnology', 'Medical Devices'],
    'Finance': ['Banks', 'Insurance', 'Investment Services'],
    'Consumer Goods': ['Beverages', 'Food Products', 'Household Products'],
    'Utilities': ['Electric Utilities', 'Gas Utilities', 'Water Utilities'],
    'Energy': ['Oil & Gas', 'Renewable Energy', 'Coal'],
    'Industrial Goods': ['Aerospace', 'Construction', 'Manufacturing']
}

# Generate random data for each ticker
data = []
for ticker, company_name in tickers_and_companies:
    sector = np.random.choice(sectors)
    industry = np.random.choice(industries[sector])
    market_cap = round(np.random.uniform(0.1, 500), 2)  # in billions
    eps = round(np.random.uniform(-10, 10), 2)
    pe_ratio = round(np.random.uniform(5, 50), 2)
    dividend_yield = round(np.random.uniform(0, 10), 2)
    pb_ratio = round(np.random.uniform(0.1, 20), 2)
    de_ratio = round(np.random.uniform(0, 3), 2)

    # Introduce NaN values randomly
    if np.random.rand() < 0.1: market_cap = np.nan
    if np.random.rand() < 0.1: eps = np.nan
    if np.random.rand() < 0.1: pe_ratio = np.nan
    if np.random.rand() < 0.1: dividend_yield = np.nan
    if np.random.rand() < 0.1: pb_ratio = np.nan
    if np.random.rand() < 0.1: de_ratio = np.nan

    data.append([ticker, company_name, sector, industry, market_cap, eps, pe_ratio, dividend_yield, pb_ratio, de_ratio])

# Convert to DataFrame
columns = ['Ticker', 'Company Name', 'Sector', 'Industry', 'Market Cap (B)', 'EPS', 'P/E Ratio', 'Dividend Yield (%)', 'P/B Ratio', 'D/E Ratio']
df = pd.DataFrame(data, columns=columns)

# Introduce duplicate rows
df = pd.concat([df, df.sample(frac=0.05, random_state=42)])  # Add 5% duplicates

# Save to CSV
df.to_csv('fundamental_research_dataset_large.csv', index=False)

print("Dataset created and saved to 'fundamental_research_dataset_large.csv'")
```

### Dataset 2: Combined Asset Data

This dataset includes stock, Forex, and cryptocurrency data with NaN values and duplicates.

```python
import pandas as pd
import numpy as np

# Function to create a synthetic dataset for a given asset type
def create_asset_data(tickers, start_date, end_date):
    dates = pd.date_range(start=start_date, end=end_date, freq='B')
    data = []
    for ticker in tickers:
        np.random.seed(42)  # For reproducibility
        price = np.random.randn(len(dates)).cumsum() + 100
        for i in range(len(dates)):
            open_price = price[i] + np.random.uniform(-1, 1)
            high_price = max(open_price, price[i] + np.random.uniform(0, 2))
            low_price = min(open_price, price[i] - np.random.uniform(0, 2))
            close_price = price[i] + np.random.uniform(-1, 1)
            volume = np.random.randint(1000, 1000000)
            # Introduce NaN values randomly
            if np.random.rand() < 0.05:
                open_price = np.nan
            if np.random.rand() < 0.05:
                high_price = np.nan
            if np.random.rand() < 0.05:
                low_price = np.nan
            if np.random.rand() < 0.05:
                close_price = np.nan
            if np.random.rand() < 0.05:
                volume = np.nan
            data.append([dates[i], ticker, open_price, high_price, low_price, close_price, volume])
    return pd.DataFrame(data, columns=['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume'])

# Define tickers for each asset type
stock_tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'TSLA']
forex_tickers = ['EUR/USD', 'GBP/USD', 'USD/JPY', 'AUD/USD', 'USD/CAD']
crypto_tickers = ['BTC/USD', 'ETH/USD', 'XRP/USD', 'LTC/USD', 'BCH/USD']

# Generate data for each asset type
stock_data = create_asset_data(stock_tickers, '2015-01-01', '2023-01-01')
forex_data = create_asset_data(forex_tickers, '2015-01-01', '2023-01-01')
crypto_data = create_asset_data(crypto_tickers, '2015-01-01', '2023-01-01')

# Combine all data into a single DataFrame
combined_data = pd.concat([stock_data, forex_data, crypto_data])

# Introduce duplicate rows
combined_data = pd.concat([combined_data, combined_data.sample(frac=0.05, random_state=42)])  # Add 5% duplicates

# Save to CSV
combined_data.to_csv('algotrading_combined_dataset.csv', index=False)

print("Dataset created and saved to 'algotrading_combined_dataset.csv'")
```

### Practice Questions

1. **Data Cleaning**:
   - Identify and remove duplicate rows from both datasets.
   - Handle NaN values appropriately (e.g., fill with mean/median, forward fill, backward fill, or remove).

2. **Data Analysis**:
   - Calculate the average Market Cap, P/E Ratio, and Dividend Yield for each sector in the fundamental research dataset.
   - Calculate the daily percentage change for each asset in the combined dataset.

3. **Data Visualization**:
   - Plot the distribution of Market Cap values for different sectors.
   - Visualize the price trends for a selected stock, Forex pair, and cryptocurrency over time.

4. **Advanced Analysis**:
   - Perform a sector-wise analysis to find which sectors have the highest and lowest average EPS in the fundamental research dataset.
   - Analyze the correlation between different Forex pairs in the combined dataset.

5. **Algorithmic Trading**:
   - Implement a simple moving average crossover strategy using the combined dataset.
   - Develop a fundamental analysis-based scoring system to rank stocks in the fundamental research dataset based on their financial metrics.

These questions and tasks should help you get hands-on experience with real-life algorithmic trading scenarios and data handling techniques.



### 1. Identify all the top-performing stocks based on their overall return from the start to the end of the dataset.

```python
import pandas as pd
import matplotlib.pyplot as plt

# Load the combined dataset
combined_df = pd.read_csv('algotrading_combined_dataset.csv')

# Calculate overall return for each asset
tickers = combined_df['Ticker'].unique()
returns = []

for ticker in tickers:
    stock_data = combined_df[combined_df['Ticker'] == ticker]
    overall_return = (stock_data['Close'].iloc[-1] - stock_data['Close'].iloc[0]) / stock_data['Close'].iloc[0]
    returns.append({'Ticker': ticker, 'Overall Return': overall_return})

returns_df = pd.DataFrame(returns).sort_values('Overall Return', ascending=False)

# Plot the top 10 performing stocks
top_10_performers = returns_df.head(10).set_index('Ticker')
top_10_performers.plot(kind='bar')
plt.title('Top 10 Performing Stocks')
plt.xlabel('Ticker')
plt.ylabel('Overall Return')
plt.show()

print(f"Top-performing stocks:\n{returns_df}")
```

### 2. How many instances of daily price change greater than 5% occurred for each stock?

```python
# Calculate daily percentage change
combined_df['Daily Change'] = combined_df['Close'].pct_change()

# Calculate instances with daily percentage change greater than 5%
combined_df['High Change'] = combined_df['Daily Change'] > 0.05

# Create a dictionary to count occurrences for each ticker
occurrences = {}
tickers = combined_df['Ticker'].unique()

for ticker in tickers:
    occurrences[ticker] = combined_df[(combined_df['Ticker'] == ticker) & (combined_df['High Change'])].shape[0]

# Plot the top 10 tickers with most high changes
high_change_counts = pd.Series(occurrences).sort_values(ascending=False).head(10)
high_change_counts.plot(kind='bar')
plt.title('Top 10 Tickers with Daily Price Change > 5%')
plt.xlabel('Ticker')
plt.ylabel('Count of High Changes')
plt.show()

print(f"Number of instances with daily price change greater than 5%:\n{occurrences}")
```

### 3. Calculate the number of times each stock's price increased in the first quarter.

```python
# Filter for the first quarter
combined_df['Date'] = pd.to_datetime(combined_df['Date'])
first_quarter = combined_df[combined_df['Date'].dt.month.isin([1, 2, 3])]

# Count the number of times the stock's price increased
first_quarter['Price Increase'] = first_quarter['Close'] > first_quarter['Open']
price_increases = {}

for ticker in tickers:
    price_increases[ticker] = first_quarter[(first_quarter['Ticker'] == ticker) & (first_quarter['Price Increase'])].shape[0]

# Plot the top 10 stocks with most price increases in the first quarter
price_increases_counts = pd.Series(price_increases).sort_values(ascending=False).head(10)
price_increases_counts.plot(kind='bar')
plt.title('Top 10 Stocks with Price Increases in Q1')
plt.xlabel('Ticker')
plt.ylabel('Count of Price Increases')
plt.show()

print(f"Number of price increases in the first quarter:\n{price_increases}")
```

### 4. Calculate the percentage of times the opening price being higher than the previous day's closing price resulted in a higher closing price on that day.

```python
# Sort the DataFrame by Ticker and Date
combined_df = combined_df.sort_values(['Ticker', 'Date'])

# Calculate if opening price is higher than the previous day's closing price
combined_df['Previous Close'] = combined_df['Close'].shift(1)
combined_df['Open Higher'] = combined_df['Open'] > combined_df['Previous Close']

# Calculate if the closing price is higher than the opening price
combined_df['Close Higher'] = combined_df['Close'] > combined_df['Open']

# Calculate percentage of open higher being close higher
open_close_higher = (combined_df['Open Higher'] & combined_df['Close Higher']).sum()
total_open_higher = combined_df['Open Higher'].sum()
percentage_open_close_higher = (open_close_higher / total_open_higher) * 100

print(f"Percentage of open higher being close higher: {percentage_open_close_higher:.2f}%")
```

### 5. Identify all assets with a daily percentage change higher than 8% and a volume greater than 100,000.

```python
# Filter for daily percentage change > 8% and volume > 100,000
high_change_high_volume = combined_df[(combined_df['Daily Change'] > 0.08) & (combined_df['Volume'] > 100000)]

# Plot the count of such instances for each ticker
high_change_high_volume_counts = high_change_high_volume['Ticker'].value_counts().head(10)
high_change_high_volume_counts.plot(kind='bar')
plt.title('Top 10 Tickers with Daily Change > 8% and Volume > 100,000')
plt.xlabel('Ticker')
plt.ylabel('Count')
plt.show()

print(f"Assets with daily percentage change > 8% and volume > 100,000:\n{high_change_high_volume_counts}")
```

### 6. Identify all stocks in the Technology sector with a daily percentage change higher than 7.5%.

```python
# Load the fundamental dataset
fundamental_df = pd.read_csv('fundamental_research_dataset_large.csv')

# Merge with combined dataset to get sector information
combined_with_sector = combined_df.merge(fundamental_df[['Ticker', 'Sector']], on='Ticker')

# Filter for Technology sector and daily percentage change > 7.5%
tech_high_change = combined_with_sector[(combined_with_sector['Sector'] == 'Technology') & (combined_with_sector['Daily Change'] > 0.075)]

# Plot the count of such instances for each ticker in Technology sector
tech_high_change_counts = tech_high_change['Ticker'].value_counts().head(10)
tech_high_change_counts.plot(kind='bar')
plt.title('Top 10 Technology Stocks with Daily Change > 7.5%')
plt.xlabel('Ticker')
plt.ylabel('Count')
plt.show()

print(f"Technology stocks with daily percentage change > 7.5%:\n{tech_high_change_counts}")
```

### 7. Write a function that can return the performance track record of two assets against each other over time.

```python
def track_record(asset1, asset2, combined_df):
    asset1_data = combined_df[combined_df['Ticker'] == asset1][['Date', 'Close']].rename(columns={'Close': asset1})
    asset2_data = combined_df[combined_df['Ticker'] == asset2][['Date', 'Close']].rename(columns={'Close': asset2})

    merged_data = pd.merge(asset1_data, asset2_data, on='Date', how='inner')
    merged_data.set_index('Date', inplace=True)
    
    return merged_data

# Example usage
asset1 = 'AAPL'
asset2 = 'GOOGL'
track_record_data = track_record(asset1, asset2, combined_df)

# Plot the performance track record
track_record_data.plot()
plt.title(f'Performance Track Record: {asset1} vs {asset2}')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()

print(f"Track record of {asset1} against {asset2}:\n{track_record_data}")
```

### 8. Identify and remove duplicate rows from both datasets. How many duplicates were removed?

```python
# Identify and remove duplicates
fundamental_duplicates = fundamental_df.duplicated().sum()
combined_duplicates = combined_df.duplicated().sum()

fundamental_df_clean = fundamental_df.drop_duplicates()
combined_df_clean = combined_df.drop_duplicates()

print(f"Removed {fundamental_duplicates} duplicate rows from the fundamental dataset.")
print(f"Removed {combined_duplicates} duplicate rows from the combined dataset.")
```

### 9. For the fundamental dataset, identify rows where the Market Cap is NaN, and impute these values using a sector-wise median Market Cap.

```python
# Impute NaN Market Cap values using sector-wise median
sectors = fundamental_df['Sector'].unique()

for sector in sectors:
    sector_median = fundamental_df[fundamental_df['Sector'] == sector]['Market Cap (B)'].median()
    fundamental_df.loc[(fundamental_df['Sector'] == sector) & (fundamental_df['Market Cap (B)'].isna()), 'Market Cap (B)'] = sector_median

print(f"Fundamental dataset after imputing NaN Market Cap values:\n{fundamental_df}")
```

### 10. Filter the combined dataset to find the top 5 most volatile stocks based on the standard deviation of daily percentage changes.

```python
# Calculate daily percentage change
combined_df['Daily Change'] = combined_df['Close'].pct_change()

# Calculate standard deviation of daily percentage changes for each stock
tickers = combined_df['Ticker'].unique()
volatilities = []

for ticker in tickers:
    stock_data = combined_df[combined_df['Ticker'] == ticker]
    volatility = stock_data['Daily Change'].std()
    volatilities.append({'Ticker': ticker, 'Volatility': volatility})

volatility_df = pd.DataFrame(volatilities).sort_values('Volatility', ascending=False).head(5)

# Plot the top 5 most volatile stocks
volatility_df.set_index('Ticker').plot(kind='bar')
plt.title('Top 5 Most Volatile Stocks')
plt.xlabel('Ticker')
plt.ylabel('Volatility (Standard Deviation of Daily Changes)')
plt.show()

print(f"Top 5 most volatile stocks:\n{volatility_df}")
```

