In [5]:
import pandas as pd

In [6]:
def BinPortfolio(dataframe, all_dataframe_vars=['PERMNO', 'DATE', 'mkt', 'RET_t+1'], vars_to_bin=['mkt'], go_long=[True], number_of_bins=5):
    """Parameters: 
        - vars_to_bin (list<string>): A list of variables around which binning should take place
        - coeff_signs (list[1 or -1]): A list indicating whether to go high or low in each bin
        - df (pd.Dataframe): a dataframe containing all of the necessary data.
        Returns:
        - portfolio: a dataframe containing stocks to go long in.
        - average return: the return of the portfolio (assuming equal weights)
    """

    if len(go_long) != len(vars_to_bin):
        raise ValueError(f"The length of go_long needs to be the same as the length of vars_to_bin. {len(go_long)} != {len(vars_to_bin)}")

    # Find the subset of the dataframe that we care about
    df = dataframe[all_dataframe_vars]
    df = df.dropna()
    df = df.reset_index(drop=True)

    bin_names = []
    best_bins = []
    for i, bin_var in enumerate(vars_to_bin):
        column_name = f"{bin_var}_bins"
        bin_names.append(column_name)

        if len(df.index) <= 1:
            raise ValueError("There are not enough stocks that satisfy the given requirements")
        df[column_name] = pd.qcut(df[bin_var],number_of_bins,labels=False)

        # Based on go_long, take the highest or lowest bin

        best_bin = number_of_bins - 1 if go_long[i] else 0

        df = df[df[column_name] == best_bin]

        # Delete the bin column name
        del df[column_name]

        best_bins.append(best_bin)

    # NOTE: This port stuff can be useful to visualize if there's only to variables to bin
    #port = df.groupby(bin_names)['RET'].mean()*100
    #port = port.unstack(level=bin_names[-1])
    df = df.reset_index(drop=True)

    return df

In [7]:
# Load data into pandas dataframe
#df = pd.read_parquet('Data/BoQ_Data.parquet')
#df.head()

In [8]:
# vars_to_bin = ['ME', 'BEME']
# vars_to_bin = ["roic", "BEME", "ME", "p1_mktrf_beta"]
# go_long = [True]*len(vars_to_bin)
# subset = df[df['DATE'] == '1986-05-31']
# portfolio = BinPortfolio(dataframe=subset, vars_to_bin=vars_to_bin, go_long=go_long)
# portfolio