diff --git a/project/likes_preprocessing.py b/project/likes_preprocessing.py
index 5b43c0f..4d5a1ba 100644
--- a/project/likes_preprocessing.py
+++ b/project/likes_preprocessing.py
@@ -1,17 +1,51 @@
 import tensorflow as tf
 import tensorflow_hub as hub
+import pandas as pd
 from typing import *
 
-def make_dataset(input_dir: str, userids: List[str]) -> tf.data.Dataset:
+#def make_dataset(input_dir: str, userids: List[str], COUNT_CUTOFF: int, saveTocsv: bool) -> tf.data.Dataset:
+
+
+def make_multihot_like_mat(input_dir: str, userids: List[str], COUNT_CUTOFF: int, saveTocsv: bool):
     """Creates the preprocessed text dataset for the given userid's.
     
     Arguments:
         input_dir {str} -- the parent input directory
         userids {List[str]} -- the list of userids
+        COUNT_CUTOFF {int} -- minimal frequency of a like
+        saveTocsv {bool} -- save the multihot matrix to Isabelle's directory
     
     Returns:
-        tf.data.Dataset -- the preprocessed text dataset, where each entry is the feature vector.
+        relHot -- multihot matrix of the like_id. Rows are indexed with userid
     """
-    # TODO
-    raise NotImplementedError()
+    # Get raw data
+    df = pd.read_csv(input_dir)
+    df = df.drop(['Unnamed: 0'], axis=1)
+
+    freq_like_id = df["like_id"].value_counts()
+    likes_kept = freq_like_id[freq_like_id > COUNT_CUTOFF]
+    likes_kept_inds = likes_kept.keys()
+    filtered_table = df[df["like_id"].isin(likes_kept_inds)]
+
+    relHot = pd.get_dummies(filtered_table, columns=["like_id"])
+    relHot = relHot.groupby(['userid']).sum()
+
+    if saveTocsv:
+        # create a userid row
+        userid = relHot.index
+        relHot.insert(0, "userid", userid)
+
+        # create string: Relation_Multihot_CUTOFF.csv
+        PATH = "/home/mila/teaching/user07/IsabelleWorkshop/"
+        output_filename = "Relation_Multihot_" + str(COUNT_CUTOFF) + ".csv"
+        # save to csv
+        relHot.to_csv(PATH + output_filename, index=None, header=True)
+
+        relHot = relHot.drop(["userid"], axis=1)
+
+
+    return relHot
+
+
 
+    raise NotImplementedError()
\ No newline at end of file