In [1]:
import pandas as pd
from tomlkit import date
import tc.config as config
import pdb
from tc.log_config import setup_logging
setup_logging()

import logging
# Import Module:

class ImportData:
    def __init__(
        self,
        file_path: str,
        data_format: str,
        date_format: str,
        date_col: str,
        categorical_cols: list,
        numerical_cols: list,
        identifier: str,
        week : bool
    ):
        self.file_path = file_path
        self.data_format = data_format
        self.date_format = date_format
        self.date_col = date_col
        self.categorical_cols = categorical_cols
        self.numerical_cols = numerical_cols
        self.identifier = identifier
        self.week=week

    def read_csv(self):
        try:
            df = pd.read_csv(self.file_path)
        except FileNotFoundError:
            logging.error(f"File not found at {self.file_path}")
            raise FileNotFoundError(f"File not found at {self.file_path}")
        except Exception as e:
            logging.error(f"Unexpected error occurred while reading {self.file_path} - {str(e)}")
            raise Exception(f"Unexpected error occurred: {str(e)}")
        else:
            if self.data_format == "long":
                df = self.pivot_long_to_wide(df)

            logging.info(f"File {self.file_path} imported successfully.")

            return df

    def date_conversion(self, data: pd.DataFrame) -> pd.DataFrame:
        try:
            data = data.sort_values(self.date_col)
            if self.date_format in config.format_mapping:
                format_string = config.format_mapping[self.date_format]
                data[self.date_col] = pd.to_datetime(
                    data[self.date_col], format=format_string
                ).dt.strftime("%b-%Y")
            logging.info(f"Date conversion was successfully.")
            return data
        except ValueError as e:
            logging.error(f"Date conversion error: {e}.")
            raise Exception(f"Error: {e}. Please provide correct date format")


    def pivot_long_to_wide(self, data) -> pd.DataFrame:
        try:
            # Pivot the dataframe
            data = data.filter(
                regex=f"^({self.numerical_cols}|{self.categorical_cols}|{self.date_col})",
                axis=1
            )
            data = self.date_conversion(data=data)
            unique_date_values = list(data[self.date_col].unique())
            data["Date1"] = pd.Categorical(
                data[self.date_col], categories=unique_date_values, ordered=True
            )

            pivot_df = data.pivot_table(
                index=self.identifier, columns="Date1", values=self.numerical_cols # type: ignore
            )

            # Flatten the multi-level column index
            pivot_df.columns = [
                "_".join(col).rstrip("_") for col in pivot_df.columns.values
            ]
            # Reset the index
            pivot_df = pd.DataFrame(pivot_df)
            pivot_df.reset_index(inplace=True)
            merged_data = pd.merge(
                pivot_df,
                data.drop_duplicates(subset=[self.identifier]),
                on=self.identifier,
            )
            logging.info(f"Pivoting was successfully.")
            return merged_data
        except Exception as e:
            logging.error(f"An error occurred during pivoting: {e}")
            print(f"An error occurred:Please upload long format data,Check data for long-wide conversion")
            raise Exception("Pivoting error, check data format and structure.")
            return pd.DataFrame()


# dataset_import = ImportData(
#     file_path="TC_Data_Long.csv",
#     data_format="long",
#     date_format=config.date_format,
#     date_col=config.date_columns,
#     categorical_cols=config.categorical_columns,
#     numerical_cols=config.numerical_columns,
#     identifier=config.hcp_identifier,
#     week=False

# )
# read_data = dataset_import.read_csv()
# print(read_data)
# print(read_data["Date"])

# if read_data is not None:
#     output_path = "C:/Datazymes/Github/TestandControl-Python/apps/tc_python/TC_Data_Transformed.csv"
#     read_data.to_csv(output_path, index=False)
#     print(f"Data exported to {output_path}")
# else:
#     print("No data to export.")

In [None]:
from tc import config

In [None]:
import tc.config as config

In [5]:
#main.py file

import pandas as pd
import copy
import tc.config as config
from tc.data_import import ImportData
from tc.distance_calc import DistanceCalculation
from tc.results_page import ResultCalculations ,calculate_lift




In [2]:
#Import Page module

dataset_import = ImportData(
    file_path="MMX_Data_PDE_Reduced.csv",
    data_format="long",
    date_format="dd-mm-yyyy",
    date_col=config.date_columns,
    categorical_cols=config.categorical_columns,
    numerical_cols=config.numerical_columns,
    identifier='ID',
    week=False

)

In [3]:
read_data = dataset_import.read_csv()

  pivot_df = data.pivot_table(


In [4]:
read_data.head()

Unnamed: 0,ID,Delivered_Doximity_Jan-2020,Delivered_Doximity_Feb-2020,Delivered_Doximity_Mar-2020,Delivered_Doximity_Apr-2020,Delivered_Doximity_May-2020,Delivered_Doximity_Jun-2020,Delivered_Doximity_Jul-2019,Delivered_Doximity_Aug-2019,Delivered_Doximity_Sep-2019,...,SpeakerEvent,Month,Cluster,TV_GRPs,Display_Impressions,Display_Clicks,TV2_GRPs,Display2_Impressions,Display2_Clicks,Date1
0,14934,3.0,2.0,4.0,2.0,2.0,2.0,0.0,1.0,0.0,...,0,Jan-2020,5,441,650,134,347,475,159,Jan-2020
1,14935,6.0,5.0,4.0,5.0,5.0,10.0,2.0,4.0,1.0,...,1,Jan-2020,7,751,674,694,232,106,494,Jan-2020
2,14936,6.0,8.0,12.0,5.0,8.0,8.0,4.0,7.0,1.0,...,0,Jan-2020,8,125,837,623,133,772,677,Jan-2020
3,14937,8.0,2.0,9.0,4.0,6.0,4.0,2.0,3.0,1.0,...,0,Jan-2020,9,553,285,230,66,973,778,Jan-2020
4,14938,4.0,3.0,12.0,10.0,11.0,5.0,2.0,7.0,1.0,...,2,Jan-2020,17,90,229,752,155,164,357,Jan-2020
