In [26]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import statsmodels.api as sm
from datetime import datetime, timedelta
import yfinance as yf

In [33]:
#Total movement 10/8 - 10/18
def get_stock_movement(ticker, start_date, end_date):
    try:
        # Adjust the end_date to include the intended day
        end_date_adjusted = (datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")

        stock_data = yf.download(ticker, start=start_date, end=end_date_adjusted, progress=False)

        if stock_data.empty:
            return {"error": f"No data found for {ticker} between {start_date} and {end_date}"}

        opening_price = stock_data['Open'].iloc[0]
        closing_price = stock_data['Close'].iloc[-1]
        peak_price = stock_data['High'].max()

        percentage_total = ((closing_price - opening_price) / opening_price) * 100

        return {
            "ticker": ticker,
            "10/8-10/18_total": round(percentage_total, 2)  # Rounded to two decimal places
        }

    except Exception as e:
        return {"error": str(e)}

# List of tickers
tickers = ['CELZ', 'UPC', 'ACON', 'SCPX', 'SONN', 'REVB', 'SXTC', 'CYTO', 'PBM', 'PRFX', 'YCBD', 'TIVC', 
'HSDT', 'GCTK', 'SLRX', 'AVGR', 'AKAN', 'DRMA', 'AKAN', 'DRMA', 'BACK', 'CYCC', 'NAOV', 'NDRA', 'BJDX', 
'ZVSA', 'VRPX', 'VERO', 'BIOR', 'QLGN', 'XYLO', 'SHPH', 'MTEM', 'NBY', 'SPRC', 'BBLG', 'APVO', 'BPTH',
'VTAK', 'SCNI', 'MTNB','PHIO', 'ATIP', 'PALI', 'FRES', 'LGMK', 'WINT', 'TTNP', 'LIPO', 'TCRT', 'THAR', 
'TOVX', 'ISPC', 'KTTA', 'QNRX', 'ONCO', 'ATXI', 'PTPI', 'RSLS', 'PTIX', 'AZTR', 'SISI', 'ENVB', 'CPHI',
'SXTP', 'STSS', 'ARTL', 'SNPX', 'PCSA', 'HSCS', 'CHEK', 'INM', 'SNOA', 'ATNF', 'XRTX', 'APM', 'SINT'] 
           

# Start and end dates
start_date = "2024-10-08"
end_date = "2024-10-18"

# Initialize empty list to hold the data
data = []

# Loop through tickers
for ticker in tickers:
    result = get_stock_movement(ticker, start_date, end_date)
    if "error" in result:
        print(f"Error for {ticker}: {result['error']}")
    else:
        data.append(result)

# Create DataFrame
celz_comp_df = pd.DataFrame(data)

# Display the DataFrame with the specified columns
celz_comp_df = celz_comp_df[['ticker', '10/8-10/18_total']].astype({'ticker': str, '10/8-10/18_total': float})

print(celz_comp_df)


   ticker  10/8-10/18_total
0    CELZ             51.03
1     UPC            111.17
2    ACON             -0.56
3    SCPX              7.89
4    SONN             21.11
..    ...               ...
72   SNOA             19.25
73   ATNF            264.49
74   XRTX             23.68
75    APM            -10.00
76   SINT              3.83

[77 rows x 2 columns]


  celz_comp_df = celz_comp_df[['ticker', '10/8-10/18_total']].astype({'ticker': str, '10/8-10/18_total': float})


In [34]:
import numpy as np
from scipy import stats

just_celz_total = celz_comp_df.loc[celz_comp_df['ticker'] == 'CELZ', '10/8-10/18_total'].values[0]
rest_totals = celz_comp_df.loc[celz_comp_df['ticker'] != 'CELZ', '10/8-10/18_total']

##Perplexity version
def hodges_lehmann_interval(just_celz_total, rest_totals, alpha=0.05):
    if np.isscalar(just_celz_total):
        just_celz_total = [just_celz_total]
    if np.isscalar(rest_totals):
        rest_totals = [rest_totals]

    pairwise_differences = [x1 - x2 for x1 in just_celz_total for x2 in rest_totals]
    pairwise_differences.sort()

    hl_estimator = np.median(pairwise_differences)

    n1, n2 = len(just_celz_total), len(rest_totals)
    n = n1 * n2

    # Calculate the critical value
    z = stats.norm.ppf(1 - alpha/2)

    # Calculate the standard error
    se = np.sqrt((n1 + n2 + 1) / (12 * n1 * n2))

    # Calculate the confidence interval
    margin = z * se * np.sqrt(n)
    lower_rank = int(np.ceil((n - margin) / 2) - 1)
    upper_rank = int(np.floor((n + margin) / 2) - 1)

    lower_bound = pairwise_differences[max(0, lower_rank)]
    upper_bound = pairwise_differences[min(n - 1, upper_rank)]

    return {
        "Hodges-Lehmann Estimator": hl_estimator,
        "Confidence Interval": (lower_bound, upper_bound)
    }

# The rest of your code remains the same
u_stat, p_value = stats.mannwhitneyu(just_celz_total, rest_totals, alternative='two-sided')

result = hodges_lehmann_interval(just_celz_total, rest_totals, alpha=0.05)
print(f'Mann Whitney U Statistics: {u_stat}')
print(f'p value: {p_value:.4f}')
print(f"Hodges-Lehmann Estimator: {result['Hodges-Lehmann Estimator']}")
print(f"Confidence Interval: {result['Confidence Interval']}")


Mann Whitney U Statistics: 72.0
p value: 0.1317
Hodges-Lehmann Estimator: 47.07
Confidence Interval: (np.float64(46.27), np.float64(47.550000000000004))


In [35]:
#Peak movement 10/8 - 10/18
def get_stock_movement(ticker, start_date, end_date):
    try:
        # Adjust the end_date to include the intended day
        end_date_adjusted = (datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")

        stock_data = yf.download(ticker, start=start_date, end=end_date_adjusted, progress=False)

        if stock_data.empty:
            return {"error": f"No data found for {ticker} between {start_date} and {end_date}"}

        opening_price = stock_data['Open'].iloc[0]
        peak_price = stock_data['High'].max()

        percentage_total = ((peak_price - opening_price) / opening_price) * 100

        return {
            "ticker": ticker,
            "10/8-10/18_peak": round(percentage_total, 2)  # Rounded to two decimal places
        }

    except Exception as e:
        return {"error": str(e)}

# List of tickers
tickers = ['CELZ', 'UPC', 'ACON', 'SCPX', 'SONN', 'REVB', 'SXTC', 'CYTO', 'PBM', 'PRFX', 'YCBD', 'TIVC', 
'HSDT', 'GCTK', 'SLRX', 'AVGR', 'AKAN', 'DRMA', 'AKAN', 'DRMA', 'BACK', 'CYCC', 'NAOV', 'NDRA', 'BJDX', 
'ZVSA', 'VRPX', 'VERO', 'BIOR', 'QLGN', 'XYLO', 'SHPH', 'MTEM', 'NBY', 'SPRC', 'BBLG', 'APVO', 'BPTH',
'VTAK', 'SCNI', 'MTNB','PHIO', 'ATIP', 'PALI', 'FRES', 'LGMK', 'WINT', 'TTNP', 'LIPO', 'TCRT', 'THAR', 
'TOVX', 'ISPC', 'KTTA', 'QNRX', 'ONCO', 'ATXI', 'PTPI', 'RSLS', 'PTIX', 'AZTR', 'SISI', 'ENVB', 'CPHI',
'SXTP', 'STSS', 'ARTL', 'SNPX', 'PCSA', 'HSCS', 'CHEK', 'INM', 'SNOA', 'ATNF', 'XRTX', 'APM', 'SINT'] 
           

# Start and end dates
start_date = "2024-10-08"
end_date = "2024-10-18"

# Initialize empty list to hold the data
data = []

# Loop through tickers
for ticker in tickers:
    result = get_stock_movement(ticker, start_date, end_date)
    if "error" in result:
        print(f"Error for {ticker}: {result['error']}")
    else:
        data.append(result)

# Create DataFrame
celz_comp_df = pd.DataFrame(data)

# Display the DataFrame with the specified columns
celz_comp_df = celz_comp_df[['ticker', '10/8-10/18_peak']].astype({'ticker': str, '10/8-10/18_peak': float})

print(celz_comp_df)


   ticker  10/8-10/18_peak
0    CELZ            91.10
1     UPC           407.61
2    ACON             2.79
3    SCPX            13.16
4    SONN            33.40
..    ...              ...
72   SNOA            28.30
73   ATNF          1186.23
74   XRTX            65.13
75    APM             4.74
76   SINT            14.06

[77 rows x 2 columns]


  celz_comp_df = celz_comp_df[['ticker', '10/8-10/18_peak']].astype({'ticker': str, '10/8-10/18_peak': float})


In [36]:
#For total price movement 10/8 - 10/18
from scipy.stats import mannwhitneyu

just_celz_peak = celz_comp_df.loc[celz_comp_df['ticker'] == 'CELZ', '10/8-10/18_peak'].values[0]

# Extract the values for the rest of the stocks
rest_peaks = celz_comp_df.loc[celz_comp_df['ticker'] != 'CELZ', '10/8-10/18_peak']

##Perplexity version
def hodges_lehmann_interval(just_celz_peak, rest_peaks, alpha=0.05):
    if np.isscalar(just_celz_peak):
        just_celz_peak = [just_celz_peak]
    if np.isscalar(rest_peaks):
        rest_peaks = [rest_peaks]

    pairwise_differences = [x1 - x2 for x1 in just_celz_peak for x2 in rest_peaks]
    pairwise_differences.sort()

    hl_estimator = np.median(pairwise_differences)

    n1, n2 = len(just_celz_peak), len(rest_peaks)
    n = n1 * n2

    # Calculate the critical value
    z = stats.norm.ppf(1 - alpha/2)

    # Calculate the standard error
    se = np.sqrt((n1 + n2 + 1) / (12 * n1 * n2))

    # Calculate the confidence interval
    margin = z * se * np.sqrt(n)
    lower_rank = int(np.ceil((n - margin) / 2) - 1)
    upper_rank = int(np.floor((n + margin) / 2) - 1)

    lower_bound = pairwise_differences[max(0, lower_rank)]
    upper_bound = pairwise_differences[min(n - 1, upper_rank)]

    return {
        "Hodges-Lehmann Estimator": hl_estimator,
        "Confidence Interval": (lower_bound, upper_bound)
    }

# The rest of your code remains the same
u_stat, p_value = stats.mannwhitneyu(just_celz_peak, rest_peaks, alternative='two-sided')

result = hodges_lehmann_interval(just_celz_peak, rest_peaks, alpha=0.05)
print(f'Mann Whitney U Statistics: {u_stat}')
print(f'p value: {p_value:.4f}')
print(f"Hodges-Lehmann Estimator: {result['Hodges-Lehmann Estimator']}")
print(f"Confidence Interval: {result['Confidence Interval']}")


Mann Whitney U Statistics: 71.0
p value: 0.1436
Hodges-Lehmann Estimator: 77.49
Confidence Interval: (np.float64(75.61999999999999), np.float64(78.19999999999999))


In [31]:
from scipy.stats import mannwhitneyu
import numpy as np

def hodges_lehmann_interval(just_celz_peak, rest_peaks, alpha=0.05):
    if np.isscalar(just_celz_peak):
        just_celz_peak = [just_celz_peak]
    if np.isscalar(rest_peaks):
        rest_peaks = [rest_peaks]

    pairwise_differences = [x1 - x2 for x1 in just_celz_peak for x2 in rest_peaks]
    pairwise_differences.sort()

    hl_estimator = np.median(pairwise_differences)

    n = len(pairwise_differences)

    _, u_stat = mannwhitneyu(just_celz_peak, rest_peaks, alternative='two-sided')

    lower_rank = int(max(0, u_stat - 1.96 * np.sqrt(n)))  # Ensure lower rank is >= 0
    upper_rank = int(min(n - 1, u_stat + 1.96 * np.sqrt(n)))  # Ensure upper rank < n

    lower_bound = pairwise_differences[lower_rank]
    upper_bound = pairwise_differences[upper_rank]

    return {
        "Hodges-Lehmann Estimator": hl_estimator,
        "Confidence Interval": (lower_bound, upper_bound)
    }



result = hodges_lehmann_interval(just_celz_peak, rest_peaks, alpha=0.05)

print(f"Hodges-Lehmann Estimator: {result['Hodges-Lehmann Estimator']}")
print(f"Confidence Interval: {result['Confidence Interval']}")


Hodges-Lehmann Estimator: 77.49
Confidence Interval: (np.float64(-1095.13), np.float64(60.72999999999999))
