This file is used testing new bit of code before. Please label all code accordingly. Describe what each cell of code is intended to do.

In [None]:
# Correlation Testing - Created 6-23-2024
# Using Pearson Correlation Coefficient First

In [1]:
# Import Python Libraries needed for analysis.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats

In [None]:
# Data Ingestion
# If data new data needs to be pulled from Alpha Vantage use function av_pull




# json_to_df -- converts json pulled from alphvantage and converts into a dataframe. It also adds a title to columns that contains the date and names it "date".
# df_name will be "symbol_df", example TASEKO Mines symbol is TGB, therefore df_name would be TGB_df


     
# the function contained in this cell pertain to statistical analysis code, and specifically correlation testing
def csv_to_df(file_name, symbol):
    df = pd.read_csv(file_name, parse_dates['Date'], index_col = 'Date')
    return {symbol: df}

     
     

In [None]:
# This code creates the dictionary to store our Dataframes
stock_dfs = {}

In [None]:
# In case our date ranges dont match up

In [None]:
import numpy as np
from scipy.signal import argrelextrema

# Reset the index to convert DateTimeIndex to a regular column
vrax_df = vrax_df.reset_index()
vrax_df.rename(columns={'index': 'Date'}, inplace=True)

# Calculate local maxima and minima
vrax_df['Local_Max'] = vrax_df.iloc[argrelextrema(df['Close'].values, np.greater_equal, order=5)[0]]['Close']
vrax_df['Local_Min'] = vrax_df.iloc[argrelextrema(df['Close'].values, np.less_equal, order=5)[0]]['Close']

# Calculate the average rate of change
vrax_df['Change'] = vrax_df['Close'].diff()
average_rate_of_change = vrax_df['Change'].mean()

# Calculate the duration of trends (number of data points between local maxima and minima)
local_extrema_indices = sorted(vrax_df.dropna(subset=['Local_Max', 'Local_Min']).index)
if len(local_extrema_indices) > 1:
    durations = np.diff(local_extrema_indices)
    average_duration = durations.mean()
else:
    average_duration = np.nan  # or handle appropriately if no durations are found

print("Average Rate of Change:", average_rate_of_change)
print("Average Duration of Trends:", average_duration)

# Display the DataFrame
print(vrax_df.info())
print(vrax_df.head())

In [None]:
import matplotlib.pyplot as plt
# Plotting the data
plt.figure(figsize=(14, 7))
plt.plot(vrax_df['Date'], vrax_df['Close'], label='Close Price', color='blue')

# Plot local maxima
plt.scatter(vrax_df['Date'], vrax_df['Local_Max'], label='Local Max', color='green', marker='^', alpha=1)

# Plot local minima
plt.scatter(vrax_df['Date'], vrax_df['Local_Min'], label='Local Min', color='red', marker='v', alpha=1)

plt.title('Stock Prices with Local Maxima and Minima')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

In [None]:
import pandas as pd
import numpy as np
from scipy.signal import argrelextrema, savgol_filter
import matplotlib.pyplot as plt

# Sample data: replace this with your actual stock data
vrax_df = pd.DataFrame({
    'Close': np.random.randn(100).cumsum()
}, index=pd.date_range(start='1/1/2020', periods=100))

# Reset the index to convert DateTimeIndex to a regular column
vrax_df = vrax_df.reset_index()
vrax_df.rename(columns={'index': 'Date'}, inplace=True)

# Smooth the data using a moving average
window_size = 5
vrax_df['Close_MA'] = vrax_df['Close'].rolling(window=window_size).mean()

# Smooth the data using a Savitzky-Golay filter
window_size = 11  # window size should be odd
poly_order = 2  # polynomial order
vrax_df['Close_SG'] = savgol_filter(vrax_df['Close'], window_size, poly_order)

# Calculate local maxima and minima on the smoothed data (using Savitzky-Golay filter as an example)
vrax_df['Local_Max'] = vrax_df.iloc[argrelextrema(vrax_df['Close_SG'].values, np.greater_equal, order=5)[0]]['Close_SG']
vrax_df['Local_Min'] = vrax_df.iloc[argrelextrema(vrax_df['Close_SG'].values, np.less_equal, order=5)[0]]['Close_SG']

# Calculate the average rate of change
vrax_df['Change'] = vrax_df['Close_SG'].diff()
average_rate_of_change = vrax_df['Change'].mean()

# Calculate the duration of trends (number of data points between local maxima and minima)
local_extrema_indices = sorted(vrax_df.dropna(subset=['Local_Max', 'Local_Min']).index)
if len(local_extrema_indices) > 1:
    durations = np.diff(local_extrema_indices)
    average_duration = durations.mean()
else:
    average_duration = np.nan  # or handle appropriately if no durations are found

print("Average Rate of Change:", average_rate_of_change)
print("Average Duration of Trends:", average_duration)

# Plotting the data
plt.figure(figsize=(14, 7))
plt.plot(vrax_df['Date'], vrax_df['Close'], label='Close Price', color='blue', alpha=0.5)
plt.plot(vrax_df['Date'], vrax_df['Close_MA'], label='Moving Average', color='orange')
plt.plot(vrax_df['Date'], vrax_df['Close_SG'], label='Savitzky-Golay Filter', color='green')

# Plot local maxima and minima
plt.scatter(vrax_df['Date'], vrax_df['Local_Max'], label='Local Max (SG)', color='red', marker='^')
plt.scatter(vrax_df['Date'], vrax_df['Local_Min'], label='Local Min (SG)', color='purple', marker='v')

plt.title('Stock Prices with Smoothing')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()


In [None]:
## The Below Code Is For Testing Web Scraping

In [2]:
## Web Scrape

import requests
from bs4 import BeautifulSoup

In [3]:
symbol = 'TGB'
my_url = f'https://finance.yahoo.com/quote/{symbol}'


response = requests.get(my_url)

In [12]:
print("response.ok : {} , response.status_code : {}".format(response.ok , response.status_code))

response.ok : True , response.status_code : 200


In [13]:
print("Preview of response.text : ", response.text[:500])

Preview of response.text :  <!doctype html>
<html lang="en-US" theme="light" data-color-scheme="light" class="desktop neo-green dock-upscale failsafe">
    <head>
        <meta charset="utf-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta name="oath:guce:consent-host" content="guce.yahoo.com">
        
		<link href="../../assets/_app/immutable/assets/2.CaHVP_4h.css" rel="stylesheet">
		<link href="../../assets/_app/immutable/assets/Ads.3RMLh2mX.css" rel="stylesheet">
		<link hr


In [5]:
def get_page(url):
    """Download a webpage and return a beautiful soup doc"""
    response = requests.get(url)
    if not response.ok:
        print('Status code:', response.status_code)
        raise Exception('Failed to load page {}'.format(url))
    page_content = response.text
    doc = BeautifulSoup(page_content, 'html.parser')
    return doc



In [6]:
symbol = 'TGB'
my_url = f'https://finance.yahoo.com/quote/{symbol}'

doc = get_page(my_url)

title = doc.find('title')
div_tags = doc.find_all('div', {'class': "content"})

print(title)
#print(div_tags)



NameError: name 'requests' is not defined

In [17]:
# This code searches the div_tags to find each article and title
print("Source: {}".format(div_tags[0].find('div', {'class': "publishing"}).text))
print("Headline:", div_tags[0].find('h3', {'class': "clamp"}).text)

link_tag = div_tags[0].find('a')
if link_tag:
    link_href = link_tag.get('href')
    print("Link: {}".format(link_href))
else:
    print("No link found")

Source: Reuters â€¢ 7 days ago
Headline: Canada's Taseko shares fall 8% after strike at Gibraltar mine
Link: https://finance.yahoo.com/news/canadas-taseko-shares-fall-8-183903487.html


In [7]:
# doc is a beautifulSoup doc item that is created in the get_page function
def get_news_data(url):
    # Send HTTP request to the URL
    response = requests.get(url)
    # Parse the HTML content of the page
    doc = BeautifulSoup(response.text, 'html.parser')

    div_tags = doc.find_all('div', {'class': "content"})

    # Initialize a dictionary to store the data
    data = {}

    # Extract the source
    data['source'] = (div_tags[0].find('div', {'class': "publishing"}).text)
    #source = doc.find('div', {'class': 'publishing'})
    #data['source'] = source #.text.strip() if source else 'No source found'

    # Extract the headline
    headline = div_tags[0].find('h3', {'class': "clamp"}).text
    data['headline'] = headline if headline else 'No headline found'

    # Extract the link
    link = doc.find('a', {'class': 'subtle-link'})
    data['link'] = link['href'] if link else 'No link found'

    return data

In [8]:
get_news_data(my_url)


NameError: name 'requests' is not defined