In [1]:
import sqlite3
conn = sqlite3.connect('tasks.db',check_same_thread=False)
c = conn.cursor()

def create_table():
    c.execute('CREATE TABLE IF NOT EXISTS tasks ( title TEXT, tag TEXT, deadline DATETIME, about TEXT, task_status TEXT, time_estimation TEXT, start_time DATETIME, end_time DATETIME, deadline_met BOOL )')

def add_row(title, tag, deadline, deadline_date, about, task_status, time_estimation, start_time, end_time, deadline_met):
    c.execute('INSERT INTO tasks(title, tag, deadline, deadline_date, about, task_status, time_estimation, start_time, end_time, deadline_met) VALUES (?,?,?,?,?,?,?,?,?,?)',(title, tag, deadline, deadline_date, about, task_status, time_estimation, start_time, end_time, deadline_met))
    conn.commit()
    
def view_all_data():
    c.execute('SELECT * FROM tasks')
    data = c.fetchall()
    return data

def view_all_task_names():
    c.execute('SELECT DISTINCT title FROM tasks')
    data = c.fetchall()
    return data
              
def get_task(task):
    c.execute('SELECT * FROM tasks WHERE title="{}"'.format(task))
    data = c.fetchall()
    return data

def get_today_tasks(deadline_date):
    c.execute('SELECT * FROM tasks WHERE deadline_date="{}"'.format(deadline_date))
    data = c.fetchall()
    return data
              
def get_task_by_status(task_status):
    c.execute('SELECT * FROM tasks WHERE task_status="{}"'.format(task_status))
    data = c.fetchall()
              
# def edit_task_data(new_task,new_task_status,new_task_date,task,task_status,task_due_date):
#     c.execute("UPDATE taskstable SET task =?,task_status=?,task_due_date=? WHERE task=? and task_status=? and task_due_date=? ",(new_task,new_task_status,new_task_date,task,task_status,task_due_date))
#     conn.commit()
#     data = c.fetchall()
#     return data
              
def delete_data(task):
    c.execute('DELETE FROM tasks WHERE title="{}"'.format(task))
    conn.commit()



#  ****************************************************
#  ML Model Functions   
#  ****************************************************

def get_all_ml_data(filename):
    c.execute('SELECT * FROM '+ filename)
    data = c.fetchall()
    return data

In [3]:
import pandas as pd 
from db_funcs import *
from sklearn.ensemble import RandomForestClassifier


In [29]:
train = get_all_ml_data('dataML')
train_df = pd.DataFrame(train,columns=["index", "total_days_from_input", "amount_days_late", "avg_mood_int", "avg_sleep_hours_int", "medication_taken", "chore_type", "count_times_late_in_this_chore", "is_postponed"])

data = get_all_ml_data('dataMLCopy') # np 'is_postponed' column
data_df = pd.DataFrame(data,columns=["index", "total_days_from_input", "amount_days_late", "avg_mood_int", "avg_sleep_hours_int", "medication_taken", "chore_type", "count_times_late_in_this_chore"])



In [30]:
    # Assuming 'df' is your DataFrame prepared similarly to your previous steps
    # Let's say 'is_postponed' is a binary column indicating whether a chore was postponed (1) or not (0)

    # Preparing the feature set and target variable
X = train_df[["total_days_from_input", "amount_days_late", "avg_mood_int", "avg_sleep_hours_int", "medication_taken", "chore_type", "count_times_late_in_this_chore"]]
y = train_df['is_postponed']  # This should be a binary column in your dataset

    # Training the RandomForestClassifier on the entire dataset
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X, y)


RandomForestClassifier(random_state=42)

In [31]:
X

Unnamed: 0,total_days_from_input,amount_days_late,avg_mood_int,avg_sleep_hours_int,medication_taken,chore_type,count_times_late_in_this_chore
0,7,0,5,5,1,0,22
1,4,5,4,5,1,0,22
2,8,0,3,6,0,0,22
3,5,2,4,7,0,0,22
4,7,0,5,6,0,0,22
...,...,...,...,...,...,...,...
80,7,11,6,7,1,2,20
81,2,12,5,6,0,2,20
82,1,12,5,4,0,2,20
83,4,2,5,8,0,2,20


In [32]:
pred_data_df = data_df[["total_days_from_input", "amount_days_late", "avg_mood_int", "avg_sleep_hours_int", "medication_taken", "chore_type", "count_times_late_in_this_chore"]]
pred_data_df.head()

Unnamed: 0,total_days_from_input,amount_days_late,avg_mood_int,avg_sleep_hours_int,medication_taken,chore_type,count_times_late_in_this_chore
0,7,0,5,5,1,0,22
1,4,5,4,5,1,0,22
2,8,0,3,6,0,0,22
3,5,2,4,7,0,0,22
4,7,0,5,6,0,0,22


In [33]:
    # To predict the probability of postponement for a new sample:
    # Assuming 'new_sample' is a new data point with the same features as in X
    # new_sample should be reshaped as 1 sample (-1) with a number of features (5 in this case)
probabilities = clf.predict_proba(pred_data_df)
    
# Extracting the probabilities for being postponed (assuming it's class 1)
postponed_probabilities = probabilities[:, 1]

    # Adding these probabilities as a new column in your DataFrame

pred_data_df['probability_postponed'] = postponed_probabilities

    # Displaying the first few rows to verify
pred_data_df.head()

Unnamed: 0,total_days_from_input,amount_days_late,avg_mood_int,avg_sleep_hours_int,medication_taken,chore_type,count_times_late_in_this_chore,probability_postponed
0,7,0,5,5,1,0,22,0.93
1,4,5,4,5,1,0,22,0.95
2,8,0,3,6,0,0,22,0.0
3,5,2,4,7,0,0,22,0.01
4,7,0,5,6,0,0,22,0.0


In [35]:
# Find the index of the row with the highest probability of being postponed
highest_prob_index = pred_data_df['probability_postponed'].idxmax()

# Print the index
print(f"Index of the row with the highest probability of being postponed: {highest_prob_index}")

Index of the row with the highest probability of being postponed: 1


In [34]:
data_df

Unnamed: 0,index,total_days_from_input,amount_days_late,avg_mood_int,avg_sleep_hours_int,medication_taken,chore_type,count_times_late_in_this_chore
0,0,7,0,5,5,1,0,22
1,1,4,5,4,5,1,0,22
2,2,8,0,3,6,0,0,22
3,3,5,2,4,7,0,0,22
4,4,7,0,5,6,0,0,22
5,5,3,1,5,6,0,0,22
6,6,7,1,5,7,1,0,22
7,7,8,0,4,7,1,0,22
8,8,5,6,7,6,0,0,22
9,9,4,3,3,6,0,0,22
