## CSV upload

In [2]:
import pandas as pd
import csv

def upload_and_read_csv():
    # Obtain the CSV file path from the user.
    file_path = input("Input data path: ")

    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            # Read the file contents using csv.reader.
            csv_reader = csv.reader(file)
            # Convert csv_reader to a list of lists.
            data_list = list(csv_reader)
            # Create a DataFrame from the list of lists.
            # The first row of the CSV is typically the header, which we use for column names.
            df = pd.DataFrame(data_list[1:], columns=data_list[0])
            # Convert columns to appropriate data types.
            df = df.apply(pd.to_numeric, errors='ignore')
            pd.set_option('display.width', 1000)
            return df

    except FileNotFoundError:
        print(f"Can't find CSV: {file_path}")
        return None
    except Exception as e:
        print(f"An error occurred when loading data: {e}")
        return None

if __name__ == "__main__":
    df = upload_and_read_csv()
    if df is not None:
        print(df.head())  # Display the first few rows of the DataFrame


         date ad group  ad  Impression  Clicks  Views  Buy     CTR     VTR     BTR  CPV  CPB  ad Cost  product Cost  product price    ROAS
0  2023-01-01        A   1       40582     228   7071   13  0.0056  0.1742  0.0570   25  445     5786           500            700  0.6826
1  2023-01-01        A   2       42647     180   7152    8  0.0042  0.1677  0.0444   39  869     6951           500            700  0.7808
2  2023-01-01        B   1       48606     202   7363   14  0.0042  0.1515  0.0693   36  526     7357           500            700  0.7075
3  2023-01-01        B   2       47853     283   7793   11  0.0059  0.1629  0.0389   25  646     7102           500            700  0.7381
4  2023-01-01        C   1       56271     282   8021   14  0.0050  0.1425  0.0496   28  557     7804           500            700  0.7163


### Train / Test /split

In [6]:
# find out the size of the test set
import random
index = df.shape[0] // 5  # 20%
random.seed(697)  # initializes the random number generator
indices = list(range(0, df.shape[0]))
random.shuffle(indices)

training_indices = indices[:-index*2]
validation_indices = indices[-index*2:-index]
test_indices = indices[-index:]

training_indices[:10]

[40, 138, 52, 23, 22, 103, 176, 29, 38, 34]