In [1]:
import pandas as pd

In [44]:
import yfinance as yf
import pandas as pd
from datetime import datetime
from meteostat import Point, Daily
from functools import reduce

def fetch_stock_data(tickers, start_year=2023, end_year=2024):
    all_years_data = []
    for ticker in tickers:
        for year in range(start_year, end_year + 1):
            start_date, end_date = f"{year}-01-01", f"{year}-12-31"
            stock = yf.Ticker(ticker)
            hist = stock.history(start=start_date, end=end_date)
            if not hist.empty:
                hist.reset_index(inplace=True)
                hist['Ticker'] = ticker
                all_years_data.append(hist[['Date', 'Ticker', 'Open', 'Close', 'High', 'Low', 'Volume']])
    return pd.concat(all_years_data, ignore_index=True)

def fetch_weather_data(city_points, start_date, end_date):
    weather_frames = []
    for city_name, point in city_points.items():
        weather = Daily(point, start_date, end_date).fetch().reset_index()
        weather.rename(columns={'time': 'Date', 'tavg': f'tavg_{city_name}'}, inplace=True)
        weather = weather[['Date', f'tavg_{city_name}']]
        weather_frames.append(weather)
    return reduce(lambda left, right: pd.merge(left, right, on='Date', how='outer'), weather_frames)

# Define cities for weather data
cities = {
    'NewYork': Point(40.7128, -74.0060),   # New York City, USA
    'London': Point(51.5074, -0.1278),      # London, UK
    'Paris': Point(48.8566, 2.3522),        # Paris, France
    'Tokyo': Point(35.6895, 139.6917),      # Tokyo, Japan
    'Sydney': Point(-33.8688, 151.2093),     # Sydney, Australia
    'Berlin': Point(52.5200, 13.4050),       # Berlin, Germany
    'Beijing': Point(39.9042, 116.4074),    # Beijing, China
    'Moscow': Point(55.7558, 37.6176),       # Moscow, Russia
    'Zurich': Point(47.3769, 8.5417),        # Zurich, Switzerland
    'Madrid': Point(40.4168, -3.7038),        # Madrid, Spain
    'Toronto': Point(43.65107, -79.347015),  # Toronto, Canada
    'Vienna': Point(48.20849, 16.37208),     # Vienna, Austria

}

# Fetching stock and weather data
combined_stock_data = fetch_stock_data(['AMZN', 'AAPL', 'NFLX', 'GOOGL'])
weather_data = fetch_weather_data(cities, datetime(2023, 1, 1), datetime(2023, 12, 31))

# Merging data on 'Date'
final_data = pd.concat([combined_stock_data, weather_data], axis=1, join='inner')
#keep only one column for date
final_data = final_data.loc[:,~final_data.columns.duplicated()]
#print amount of nan values
print(final_data.isna().sum())

final_data.head()


Date            0
Ticker          0
Open            0
Close           0
High            0
Low             0
Volume          0
tavg_NewYork    0
tavg_London     0
tavg_Paris      0
tavg_Tokyo      0
tavg_Sydney     0
tavg_Berlin     0
tavg_Beijing    0
tavg_Moscow     0
tavg_Zurich     0
tavg_Madrid     0
tavg_Toronto    0
tavg_Vienna     0
dtype: int64


Unnamed: 0,Date,Ticker,Open,Close,High,Low,Volume,tavg_NewYork,tavg_London,tavg_Paris,tavg_Tokyo,tavg_Sydney,tavg_Berlin,tavg_Beijing,tavg_Moscow,tavg_Zurich,tavg_Madrid,tavg_Toronto,tavg_Vienna
0,2023-01-03 00:00:00-05:00,AMZN,85.459999,85.82,86.959999,84.209999,76706000,8.1,9.3,13.1,6.5,23.3,13.9,-1.8,3.2,11.0,8.3,3.9,6.4
1,2023-01-04 00:00:00-05:00,AMZN,86.550003,85.139999,86.980003,83.360001,68885100,7.5,5.6,10.8,6.3,24.2,12.1,-4.8,0.5,7.5,8.7,4.6,5.6
2,2023-01-05 00:00:00-05:00,AMZN,85.330002,83.120003,85.419998,83.07,67930800,10.2,8.0,7.7,5.8,24.2,6.2,-5.7,-0.4,7.0,7.1,3.6,7.5
3,2023-01-06 00:00:00-05:00,AMZN,83.029999,86.080002,86.400002,81.43,83303400,13.0,11.5,11.8,5.6,19.5,6.4,-4.4,-2.1,7.5,5.3,3.5,7.8
4,2023-01-09 00:00:00-05:00,AMZN,87.459999,87.360001,89.480003,87.080002,65266100,10.9,10.1,12.8,6.0,18.3,7.9,-3.3,-6.1,9.8,4.9,4.0,12.4
