In [None]:
!pip install sklearn



In [None]:
from tqdm import tqdm
import json
import os

import pandas as pd
import numpy as np

from typing import List
from sklearn.model_selection import train_test_split

In [None]:
def update_df(df: pd.DataFrame, classifier: str, accuracy: float, split_size:float = 0.5, remarks: List[str] = []) -> pd.DataFrame:
  new_row = {'Classifier':classifier, 'Accuracy':accuracy, 'split_size':split_size, 'Remarks': remarks}
  df = df.append(new_row, ignore_index=True)
  return df

In [None]:
df = pd.DataFrame(columns= ["Classifier", "Accuracy", "split_size", "Remarks"])
df = update_df(df, "KNN", 76, 0.1)
df = update_df(df, "SVM", 99, remarks=["Check again"])
df = update_df(df, "LR", 65, 0.7, remarks=["param tuning", "overfitting"])
df

Unnamed: 0,Classifier,Accuracy,split_size,Remarks
0,KNN,76,0.1,[]
1,SVM,99,0.5,[Check again]
2,LR,65,0.7,"[param tuning, overfitting]"


In [None]:
def count_files_in_dir(project_root_dir, *fpaths: str):
  for path in fpaths:
    rel_path = os.path.join(project_root_dir, path)
    print(path, ":", len(os.listdir(rel_path)))

In [None]:
count_files_in_dir("../usr", "games", "include", "local", "bin")

games : 0
include : 339
local : 18
bin : 968


In [None]:
def custom_train_test_split(classifier: str, y: List, *X: List, stratify, **split_args):
  print("Classifier used: ", classifier)
  print("Keys:", split_args.keys())
  print("Values: ", split_args.values())
  print(X)
  print(y)
  print("Length of passed keyword arguments: ", len(split_args))

  # *train_all, labels_all, size, seed = split_args.values()

  trainx, testx, *synthetic, trainy, testy = train_test_split(*X,
                                                    y,
                                                    stratify = stratify,
                                                    **split_args)
  
  print("trainx: ", trainx, "trainy: ",trainy, '\n',  "testx: ", testx, "testy:", testy)
  print("synthetic train and test: ", *synthetic)

In [None]:
ims = [1,2,3,4,5,6]
labels = ['a', 'b', 'c'] * 2
synthetic_ims = [10, 20, 30, 40, 50, 60]
split_size = 0.6
seed = 50
custom_train_test_split("SVM", labels, ims, synthetic_ims, train_size = split_size, random_state = seed, stratify = labels)

Classifier used:  SVM
Keys: dict_keys(['train_size', 'random_state'])
Values:  dict_values([0.6, 50])
([1, 2, 3, 4, 5, 6], [10, 20, 30, 40, 50, 60])
['a', 'b', 'c', 'a', 'b', 'c']
Length of passed keyword arguments:  2
trainx:  [4, 3, 5] trainy:  ['a', 'c', 'b'] 
 testx:  [2, 1, 6] testy: ['b', 'a', 'c']
synthetic train and test:  [40, 30, 50] [20, 10, 60]


In [None]:
# Example of kwargs

def print_results(**results):
  for key, val in results.items():
    print(key, val)


In [None]:
print_results(clf = "SVM", score = 98.2, time_taken = 1.28, split_size = 0.8, tuning = False)

clf SVM
score 98.2
time_taken 1.28
split_size 0.8
tuning False


In [None]:
dict1 = {'a':2 , 'b': 20}
dict2 = {'c':15 , 'd': 40}
merged_dict = {**dict1, **dict2}

In [None]:
merged_dict

{'a': 2, 'b': 20, 'c': 15, 'd': 40}