<a href="https://colab.research.google.com/github/joshua-hill/minvest-portfolio/blob/main/Minvest_Backend_Task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import os

api_key = os.getenv['ALPHAVANTAGE-API-KEY']



In [2]:


# Function to scrape DJIA stock symbols from Wikipedia
def scrape_djia_stocks(url):
    try:
        # Send a GET request to the Wikipedia page
        response = requests.get(url)
        # Raise an exception if the request was unsuccessful
        response.raise_for_status()

        # Parse the page content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the table that contains the DJIA stocks
        # The table of interest has the class 'wikitable sortable'
        table = soup.find('table', {'class': 'wikitable sortable'})

        # Extract the stock symbols from the table
        # Assuming the stock symbols are in the first column
        stocks = []
        for row in table.find_all('tr')[1:]:  # Skip the header row
            cells = row.find_all('td')
            if len(cells) > 1:  # Make sure there are enough columns
                symbol = cells[1].text.strip()
                stocks.append(symbol)

        return stocks

    except requests.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"An error occurred: {err}")

# URL of the Wikipedia page containing DJIA stocks
djia_url = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'

# Scrape the stock symbols
djia_stocks = scrape_djia_stocks(djia_url)

# Check if we got the stock symbols
if djia_stocks:
    print("DJIA Stocks:", djia_stocks)
else:
    print("Failed to retrieve DJIA stocks.")


DJIA Stocks: ['MMM', 'AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CVX', 'CSCO', 'KO', 'DIS', 'DOW', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'MCD', 'MRK', 'MSFT', 'NKE', 'PG', 'CRM', 'TRV', 'UNH', 'VZ', 'V', 'WBA', 'WMT']


In [3]:
def get_user_portfolio(djia_stocks):
    portfolio = {}
    print("Please enter the stock symbols from the DJIA that you want to include in your portfolio:")
    print("Available symbols:", ' '.join(djia_stocks))

    while True:
        # Get user input for stock symbols
        user_symbols = input("Enter stock symbols separated by space: ").upper().split()

        # Validate the entered symbols
        invalid_symbols = [symbol for symbol in user_symbols if symbol not in djia_stocks]
        if invalid_symbols:
            print("These symbols are not valid DJIA stocks:", ' '.join(invalid_symbols))
            continue

        # Get the number of stocks for each symbol
        for symbol in user_symbols:
            while True:
                try:
                    count = int(input(f"Enter the number of stocks you hold for {symbol}: "))
                    if count < 0:
                        print("Please enter a positive number.")
                        continue
                    portfolio[symbol] = count
                    break
                except ValueError:
                    print("That's not a valid number. Please try again.")

        return portfolio

# Assuming djia_stocks is the list of stocks scraped from Wikipedia
user_portfolio = get_user_portfolio(djia_stocks)
print("Your portfolio:", user_portfolio)


Please enter the stock symbols from the DJIA that you want to include in your portfolio:
Available symbols: MMM AXP AMGN AAPL BA CAT CVX CSCO KO DIS DOW GS HD HON IBM INTC JNJ JPM MCD MRK MSFT NKE PG CRM TRV UNH VZ V WBA WMT
Enter stock symbols separated by space: MMM AXP
Enter the number of stocks you hold for MMM: 3
Enter the number of stocks you hold for AXP: 6
Your portfolio: {'MMM': 3, 'AXP': 6}


In [4]:

# Function to prompt the user for a date range
def get_date_range():
    # Define the default date range
    default_start_date = "01-01-2020"
    default_end_date = "01-01-2023"

    # Prompt the user for start and end dates
    start_date_str = input(f"Enter the start date (DD-MM-YYYY) or press Enter to use the default ({default_start_date}): ")
    end_date_str = input(f"Enter the end date (DD-MM-YYYY) or press Enter to use the default ({default_end_date}): ")

    # Use default dates if the user did not provide any
    start_date_str = start_date_str or default_start_date
    end_date_str = end_date_str or default_end_date

    # Function to validate the date format
    def validate_date(date_str):
        try:
            return datetime.strptime(date_str, "%d-%m-%Y")
        except ValueError:
            print(f"Date {date_str} is not in the correct format (DD-MM-YYYY).")
            return None

    # Validate the start date
    start_date = validate_date(start_date_str)
    if not start_date:
        return None, None

    # Validate the end date
    end_date = validate_date(end_date_str)
    if not end_date:
        return None, None

    # Check if the start date is before the end date
    if start_date >= end_date:
        print("The start date must be before the end date.")
        return None, None

    return start_date, end_date

while True:
  # Get the date range from the user
  start_date, end_date = get_date_range()
  # Check if we got valid dates
  if start_date and end_date:
      print(f"Start Date: {start_date.strftime('%d-%m-%Y')}")
      print(f"End Date: {end_date.strftime('%d-%m-%Y')}")
      break
  else:
      print("Invalid date range provided.")







Enter the start date (DD-MM-YYYY) or press Enter to use the default (01-01-2020): 
Enter the end date (DD-MM-YYYY) or press Enter to use the default (01-01-2023): 
Start Date: 01-01-2020
End Date: 01-01-2023


In [15]:
def inspect_api_response(symbol, api_key):
    base_url = "https://www.alphavantage.co/query"
    params = {
        "function": "TIME_SERIES_DAILY_ADJUSTED",
        "symbol": symbol,
        "apikey": api_key
    }

    response = requests.get(base_url, params=params)
    print(f"Response for {symbol}: {response.json()}")  # Print the raw JSON response


Response for DNFHHDHDHDHHFHD: {'Information': 'Thank you for using Alpha Vantage! This is a premium endpoint. You may subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly unlock all premium endpoints'}
Response for AXP: {'Information': 'Thank you for using Alpha Vantage! This is a premium endpoint. You may subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly unlock all premium endpoints'}


In [None]:
import datetime

def fetch_historical_data(stock_symbols, start_date, end_date, api_key):
    if isinstance(start_date, datetime.datetime):
        start_date = start_date.date()

    if isinstance(end_date, datetime.datetime):
        end_date = end_date.date()

    base_url = "https://www.alphavantage.co/query"
    historical_data = {}

    for symbol in stock_symbols:
        params = {
            "function": "TIME_SERIES_DAILY",
            "symbol": symbol,
            "outputsize": "full",
            "apikey": api_key
        }

        try:
            response = requests.get(base_url, params=params)
            response.raise_for_status()

            try:
                data = response.json()
            except ValueError as json_err:
                print(f"JSON parsing error for symbol {symbol}: {json_err}")
                continue


            time_series = data.get("Time Series (Daily)")

            if not time_series:
                print(f"No data found for symbol: {symbol}")
                continue

            #Getting data only iwthin the specified date range
            filtered_data = {date: values for date, values in time_series.items()
                 if start_date <= datetime.datetime.strptime(date, "%Y-%m-%d").date() <= end_date}

            historical_data[symbol] = filtered_data

            time.sleep(12)  # Respect the API rate limit

        except requests.HTTPError as http_err:
            print(f"HTTP error occurred for symbol {symbol}: {http_err}")
        except KeyError as key_err:
            print(f"Data format error for symbol {symbol}: {key_err}")
        except Exception as err:
            print(f"An error occurred for symbol {symbol}: {err}")

    combined_data = pd.DataFrame(historical_data)

    return combined_data

# Example usage:
combined_data = fetch_historical_data(user_portfolio, start_date, end_date, api_key)


In [None]:
def calculate_portfolio_metrics(df, portfolio):
    try:
        # Reverse the DataFrame so that the earliest date is at the top
        df = df.iloc[::-1]
    except Exception as e:
        print("Error reversing DataFrame:", e)
        return None

    try:
        # Transform the DataFrame to contain only closing prices
        for symbol in df.columns:
            df[symbol] = df[symbol].apply(lambda x: float(x['4. close']))
    except Exception as e:
        print("Error transforming DataFrame:", e)
        return None

    try:
        # Calculate daily returns
        daily_returns = df.pct_change()
    except Exception as e:
        print("Error calculating daily returns:", e)
        return None

    try:
        # Calculate the total number of shares in the portfolio
        total_shares = sum(portfolio.values())

        # Calculate weighted daily returns
        weighted_daily_returns = daily_returns.copy()
        for symbol in df.columns:
            if symbol in portfolio:
                weighted_daily_returns[symbol] *= (portfolio[symbol] / total_shares)

        # Sum the weighted daily returns across all stocks to get the portfolio daily return
        portfolio_daily_returns = weighted_daily_returns.sum(axis=1)
    except Exception as e:
        print("Error calculating weighted daily returns:", e)
        return None

    try:
        # Calculate Mean Daily Return
        mean_daily_return = portfolio_daily_returns.mean()
    except Exception as e:
        print("Error calculating mean daily return:", e)
        return None

    try:
        # Calculate Standard Deviation of Daily Returns
        std_dev_daily_returns = portfolio_daily_returns.std()
    except Exception as e:
        print("Error calculating standard deviation of daily returns:", e)
        return None

    try:
        # Calculate Cumulative Returns
        cumulative_returns = (1 + portfolio_daily_returns).cumprod() - 1

        # Check if cumulative_returns is empty
        if cumulative_returns.empty:
            print("Cumulative returns DataFrame is empty.")
            return None

        # Check the shape of cumulative_returns
        print("Shape of cumulative_returns:", cumulative_returns.shape)

        # Attempt to access the last value
        try:
            total_cumulative_return = cumulative_returns.iloc[-1]
        except IndexError as e:
            print("IndexError when accessing the last element of cumulative_returns:", e)
            print("Content of cumulative_returns:\n", cumulative_returns)
            return None
    except Exception as e:
        print("Error calculating cumulative returns:", e)
        return None

    return mean_daily_return, std_dev_daily_returns, total_cumulative_return

# Output
try:
    df_copy = combined_data.copy()
    mean_daily_return, std_dev_daily_returns, total_cumulative_return = calculate_portfolio_metrics(df_copy, user_portfolio)
    print("Mean Daily Return:", mean_daily_return)
    print("Standard Deviation of Daily Returns:", std_dev_daily_returns)
    print("Cumulative Return:", total_cumulative_return)
except Exception as e:
    print("Error in portfolio metrics calculation:", e)
