In [9]:
import pandas as pd
from datetime import datetime
import math
from pymongo import MongoClient

class ClickThrough:
    def __init__(self):
        self.total_distance = 0
        self.user_click_counts = {}

        self.hit_click = 0
        self.total_click = 0

        self.client = MongoClient("mongodb://localhost:27017")
        self.db = self.client["INTUX"]

        gaze_data = list(self.db["GazeData"].find())
        click_data = list(self.db["ClickData"].find())
        meta_data = list(self.db["MetaData"].find())

        self.gaze_df = pd.DataFrame(gaze_data)
        self.click_df = pd.DataFrame(click_data)
        self.meta_df = pd.DataFrame(meta_data)
    
    def preprocess(self):
        self.gaze_df = self.gaze_df.drop(['_id'],axis=1)
        self.click_df = self.click_df.drop(['_id'],axis=1)
        self.meta_df = self.meta_df.drop(['_id'],axis=1)


    def calculate_distance(self,x1, y1, x2, y2):
        return math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

    def analyze_clicks_gaze(self,gaze_data, click_data):
        total_count = 0

        for index,click in click_data.iterrows():
            click_time = click['normalizedTimestamp']
            click_x = click['x']
            click_y = click['y']

            min_timestamp = click_time - 1000  # 1 second before click in milliseconds
            max_timestamp = click_time + 1000  # 1 second after click in milliseconds

            user_gaze = []

            for i,g in gaze_data.iterrows():

                if g["normalizedTimestamp"] >= min_timestamp and g["normalizedTimestamp"] <= max_timestamp:
                    
                    euclid_dist = self.calculate_distance(g["x"], g["y"], click_x, click_y)
                    

                    if euclid_dist <= 100: # adjust the error in pixels here
                        user_gaze.append(euclid_dist)
            
            

            if user_gaze:
                total_count += 1
        
        return total_count

    def Analyse(self):
        for _, gaze_entry in self.meta_df.iterrows():
            roll_no = gaze_entry["rollNo"]
            if roll_no not in self.user_click_counts:
                self.user_click_counts[roll_no] = {"total_clicks": 0, "clicks_when_looking": 0}

        for roll_no, click_count in self.user_click_counts.items():
            selected_row = self.meta_df[self.meta_df['rollNo'] == roll_no]
            click_count["total_clicks"] = selected_row['clickCount'].values[0]

            gaze_roll = self.gaze_df[self.gaze_df['rollNo'] == roll_no]
            click_roll = self.click_df[self.click_df['rollNo'] == roll_no]

            click_count["clicks_when_looking"] = self.analyze_clicks_gaze(gaze_roll,click_roll)


        for roll_no, click_count in self.user_click_counts.items():
            self.hit_click+=click_count['clicks_when_looking']
            self.total_click+=click_count['total_clicks']

            print(f"User {roll_no}: {click_count['clicks_when_looking']} clicks out of {click_count['total_clicks']} were clicked when the user was looking at it.")

        print("Hit ratio",self.hit_click,self.total_click,self.hit_click/self.total_click)



def main():
    clickThrough = ClickThrough()
    clickThrough.Analyse()


if __name__ == "__main__":
    main()


User CB.EN.U4CSE20402: 13 clicks out of 20 were clicked when the user was looking at it.
User CB.EN.U4CSE20403: 18 clicks out of 30 were clicked when the user was looking at it.
User CB.EN.U4CSE20421: 9 clicks out of 11 were clicked when the user was looking at it.
User CB.EN.U4CSE20425: 12 clicks out of 21 were clicked when the user was looking at it.
User CB.EN.U4CSE20429: 4 clicks out of 12 were clicked when the user was looking at it.
User CB.EN.U4CSE20431: 10 clicks out of 19 were clicked when the user was looking at it.
User CB.EN.U4CSE20435: 11 clicks out of 16 were clicked when the user was looking at it.
User CB.EN.U4CSE20436: 6 clicks out of 16 were clicked when the user was looking at it.
User CB.EN.U4CSE20447: 7 clicks out of 12 were clicked when the user was looking at it.
User CB.EN.U4CSE20449: 10 clicks out of 18 were clicked when the user was looking at it.
User CB.EN.U4CSE21002: 5 clicks out of 14 were clicked when the user was looking at it.
User CB.EN.U4CSE21014: 6 c