# Imports

In [1]:
import pandas as pd
import os

# Methods

In [None]:
def get_root_dir_path(curr_working_dir, no_folders_above):
    dir_path = curr_working_dir
    for i in range(no_folders_above):
        dir_path = os.path.dirname(dir_path)
    print(f"Initial directory: {curr_working_dir}\n"
          f"Root directory: {dir_path}."
          )
    return dir_path

def load_data_pickle_csv(data_to_load):

    print(f"The following files be loaded:\n"
          f"- {data_to_load[0]}.pkl\n"
          f"- {data_to_load[1]}.csv\n"
          f"===================\nLoading datasets.."
          )
    
    results_df = pd.read_pickle(data_to_load[0] + ".pkl")
    base_domain_df = pd.read_csv(data_to_load[1] + ".csv")

    print(f"- No. Unique Users in Target Domain: {len(base_domain_df['user_id'].unique())}\n"
          f"Datasets loaded!\n===================")

    return results_df, base_domain_df

# Useful Variables

In [2]:
COLUMNS_TYPES =  {
        'user_id': 'string',
        'item_id': 'string',
        'rating': 'int',
        'timestamp': 'string',
        'title': 'string',
        'brand': 'string',
        'category': 'string'
    }

ROOT_DIR = get_root_dir_path(os.getcwd(), 2)

# ==================== DATA DIRECTORIES ==================== #
EXTRA_DATA_DIR = ROOT_DIR + '/data/processed/extra_cut/'

EXTRA_BOOKS_DIR = EXTRA_DATA_DIR + '/Books/'
EXTRA_CDS_DIR = EXTRA_DATA_DIR + '/CDs/'
EXTRA_MOVIES_DIR = EXTRA_DATA_DIR + '/Movies/'
EXTRA_TEMP_DIR = EXTRA_DATA_DIR + '/Temp/'

# ==================== PREDICTION_5_10 DIRECTORIES ==================== #
PREDICTIONS_DIR = ROOT_DIR + '/models/predictions/'

BOOKS35_CDS510_DIR = PREDICTIONS_DIR + '/books35_cds510/'
BOOKS35_MOVIES510_DIR = PREDICTIONS_DIR + '/books35_movies510/'

CDS35_BOOKS510_DIR = PREDICTIONS_DIR + '/cds35_books510/'
CDS35_MOVIES510_DIR = PREDICTIONS_DIR + '/cds35_movies510/'

MOVIES35_BOOKS510_DIR = PREDICTIONS_DIR + '/movies35_books510/'
MOVIES35_CDS510_DIR = PREDICTIONS_DIR + '/movies35_cds510/'

# ==================== PREDICTION_8_10 DIRECTORIES ==================== #
PREDICTIONS_8_10_DIR = ROOT_DIR + '/models/predictions/cut_8_10'

BOOKS35_CDS810_DIR = PREDICTIONS_8_10_DIR + '/books35_cds810/'
BOOKS35_MOVIES810_DIR = PREDICTIONS_8_10_DIR + '/books35_movies810/'

CDS35_BOOKS810_DIR = PREDICTIONS_8_10_DIR + '/cds35_books810/'
CDS35_MOVIES810_DIR = PREDICTIONS_8_10_DIR + '/cds35_movies810/'

MOVIES35_BOOKS810_DIR = PREDICTIONS_8_10_DIR + '/movies35_books810/'
MOVIES35_CDS810_DIR = PREDICTIONS_8_10_DIR + '/movies35_cds810/'

NameError: name 'get_root_dir_path' is not defined

In [None]:
PICKLE_DICT = {

    "books35_cds810_0s": [BOOKS35_CDS510_DIR + "books35_cds510_GPT_0s", EXTRA_BOOKS_DIR + "cds_8_10"], 
    "books35_cds810_1s": [BOOKS35_CDS510_DIR + "books35_cds510_GPT_1s", EXTRA_BOOKS_DIR + "cds_8_10"], 

    "books35_movies810_0s": [BOOKS35_MOVIES510_DIR + "books35_movies510_GPT_0s", EXTRA_BOOKS_DIR + "movies_8_10"], 
    "books35_movies810_1s": [BOOKS35_MOVIES510_DIR + "books35_movies510_GPT_1s", EXTRA_BOOKS_DIR + "movies_8_10"], 

    "cds35_books810_0s": [CDS35_BOOKS510_DIR + "cds35_books510_GPT_0s", EXTRA_CDS_DIR + "books_8_10"], 
    "cds35_books810_1s": [CDS35_BOOKS510_DIR + "cds35_books510_GPT_1s", EXTRA_CDS_DIR + "books_8_10"], 

    "cds35_movies810_0s": [CDS35_MOVIES510_DIR + "cds35_movies510_GPT_0s", EXTRA_CDS_DIR + "movies_8_10"], 
    "cds35_movies810_1s": [CDS35_MOVIES510_DIR + "cds35_movies510_GPT_1s", EXTRA_CDS_DIR + "movies_8_10"], 

    "movies35_books810_0s": [MOVIES35_BOOKS510_DIR + "movies35_books510_GPT_0s", EXTRA_MOVIES_DIR + "books_8_10"], 
    "movies35_books810_1s": [MOVIES35_BOOKS510_DIR + "movies35_books510_GPT_1s", EXTRA_MOVIES_DIR + "books_8_10"],

    "movies35_cds810_0s": [MOVIES35_CDS510_DIR + "movies35_cds510_GPT_0s", EXTRA_MOVIES_DIR + "cds_8_10"], 
    "movies35_cds810_1s": [MOVIES35_CDS510_DIR + "movies35_cds510_GPT_1s", EXTRA_MOVIES_DIR + "cds_8_10"],  

}

all_scenario = ['books35_cds810_0s', 'books35_cds810_1s',           # 0,1
                'books35_movies810_0s', 'books35_movies810_1s',     # 2,3
                'cds35_books810_0s', 'cds35_books810_1s',           # 4,5
                'cds35_movies810_0s', 'cds35_movies810_1s',         # 6,7
                'movies35_books810_0s', 'movies35_books810_1s',     # 8,9
                'movies35_cds810_0s', 'movies35_cds810_1s'          # 10,11      
                ]

# Data Extraction

In [None]:
def extract_8_10_results(scenario_to_use, path_to_store_pickle):

    # Path of Scenario's Data 
    scenario_datasets = PICKLE_DICT[scenario_to_use]

    # Load Scenario's Data 
    res_df, target_dom_df = load_data_pickle_csv(scenario_datasets)

    # Extract unique users ids from target domain data
    target_dom_unique_users = target_dom_df['user_id'].unique()
    
    # Extract results of the extracted unique users
    tmp_df = res_df[res_df['UserId'].isin(target_dom_unique_users)]

    # Store computed dataframe
    tmp_df.to_pickle(path_to_store_pickle)
    print(f"{scenario_to_use} results have been stored!\n=======================\n")

In [None]:
for scenario in all_scenario:

    if scenario == "books35_cds810_0s" or scenario == "books35_cds810_1s":
        results_path = BOOKS35_CDS810_DIR + scenario + ".pkl"
    elif scenario == "books35_movies810_0s" or scenario == "books35_movies810_1s":
        results_path = BOOKS35_MOVIES810_DIR + scenario + ".pkl"
    
    if scenario == "cds35_books810_0s" or scenario == "cds35_books810_1s":
        results_path = CDS35_BOOKS810_DIR + scenario + ".pkl"
    elif scenario == "cds35_movies810_0s" or scenario == "cds35_movies810_1s":
        results_path = CDS35_MOVIES810_DIR + scenario + ".pkl"

    if scenario == "movies35_books810_0s" or scenario == "movies35_books810_1s":
        results_path = MOVIES35_BOOKS810_DIR + scenario + ".pkl"
    elif scenario == "movies35_cds810_0s" or scenario == "movies35_cds810_1s":
        results_path = MOVIES35_CDS810_DIR + scenario + ".pkl"
    
    extract_8_10_results(scenario, results_path)