In [36]:
import numpy as np
import pandas as pd
import polars as pl
import sys
import re
import os
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px


pd.set_option('display.max_columns',None)
import psycopg2


#to scale the data using z-score 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#Algorithms to use
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

#Metrics to evaluate the model
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve

import warnings
warnings.filterwarnings("ignore")

#importing PCA and TSNE
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE


x = pd.read_excel(r'C:/Users/GenepoweRx_Madhu/Downloads/Reproductivehealth_Male_Lit_final_Positions_hg38_hg37.xlsx')
x['chrom'] = x['Chrom-pos-Ref-Alt_38'].str.split(',')
x = x.explode('chrom')

x['CHROM'] = x['chrom'].str.split('-').str[0]

# Function to add 'chr' prefix conditionally
def add_chr_prefix(chrom):
    if pd.notnull(chrom) and chrom.strip() != '':
        return 'chr' + str(chrom)
    else:
        return chrom

# Applying the function to the 'chromosome' column
x['CHROM'] = x['CHROM'].apply(add_chr_prefix)
x['CHROM'] = x['CHROM'].str.strip()
x['CHROM'] = x['CHROM'].str.replace(r'\s+', '')
x['POS'] = x['chrom'].str.split('-').str[1]

x.dropna(subset=['CHROM'], inplace=True)
# Drop rows with empty cells after removing leading and trailing whitespaces
x['CHROM'] = x['CHROM'].str.strip()
x['POS'] = x['POS'].str.strip()
# Dropping rows with empty cells and NaN values in both 'chromosome' and 'position' columns
x.dropna(subset=['CHROM', 'POS'], inplace=True)
df_3 = x[['Gene', 'CHROM', 'POS']]
df_3['Literature'] = 'Yes'
df_3.drop_duplicates(subset='POS', inplace=True)
df_3['POS'] = df_3['POS'].astype('int64')
df_3 = df_3.reset_index()
df_3 = df_3[['Gene', 'CHROM', 'POS', 'Literature']]

df = pd.read_csv(r'C:/Users/GenepoweRx_Madhu/Downloads/KAPA HyperExome_hg38_capture_targets (1).bed', sep = '\t', header = None)
df.columns = ['chromosome', 'Start_pos', 'End_pos', 'INFO']

df['Extended_Start_pos'] = df['Start_pos'] - 20
df['Extended_End_pos'] = df['End_pos'] + 20

df['gene_symbol'] = df['INFO'].str.extract(r'gene_symbol=([^;]+)')
df = df[['chromosome', 'Extended_Start_pos', 'Extended_End_pos', 'INFO', 'gene_symbol']]


# Step 1: Create a dictionary from the df DataFrame
chromosome_dict = {}
for _, row in df.iterrows():
    chromosome = row['chromosome']
    start_pos = row['Extended_Start_pos']
    end_pos = row['Extended_End_pos']
    if chromosome not in chromosome_dict:
        chromosome_dict[chromosome] = []
    chromosome_dict[chromosome].append((start_pos, end_pos))

# Step 2: Define a function to check coverage
def check_coverage(row):
    pos = row['POS']
    chromosome = row['CHROM']
    if chromosome in chromosome_dict:
        ranges = chromosome_dict[chromosome]
        for start, end in ranges:
            if start <= pos <= end:
                return 'Covered'
    return 'Not_Covered'

# Step 3: Apply the function to create the new column in dataset2
df_3['Covered/Not_Covered'] = df_3.apply(check_coverage, axis=1)
df_3.to_excel(r'C:/Users/GenepoweRx_Madhu/Downloads/Condition_lit_CHROM_POS_Covered_Not_covered/Reproductivehealth_Male_Lit_final_Positions.xlsx', index=False)
df_3

Unnamed: 0,Gene,CHROM,POS,Literature,Covered/Not_Covered
0,SYCP3,chr12,101728759,Yes,Covered
1,SYCP3,chr12,101729212,Yes,Covered
2,SYCP3,chr12,101737510,Yes,Not_Covered
3,SYCP3,chr12,101728656,Yes,Covered
4,SYCP3,chr12,101729109,Yes,Covered
...,...,...,...,...,...
447,WT1,chr11,32388694,Yes,Not_Covered
448,WT1,chr11,32388759,Yes,Not_Covered
449,WT1,chr11,32388568,Yes,Not_Covered
450,WT1,chr11,32392036,Yes,Covered


In [20]:
data = pd.read_csv(r'C:/Users/GenepoweRx_Madhu/Downloads/Metabolic_myopathy_Lit_final_Positions_hg38_hg37 (1).csv', sep = '\t')
data

Unnamed: 0,Gene,Change,Uploaded_variant_hg38,Uploaded_variant_hg37,Chrom-pos-Ref-Alt_38,rsID_hg38,Chrom-pos-Ref-Alt_37,rsID_hg37
0,PGM1,rs2749097,rs2749097,rs2749097,1-63661797-C-G,rs2749097,1-64127468-C-G,rs2749097
1,PGM1,c.405delT,PGM1:c.405delT,PGM1:c.405delT,"1-63629582-AT-A, 1-63629528-GT-G, 1-63651758-A...",,"1-64095253-AT-A, 1-64095199-GT-G, 1-64117429-A...",
2,PGM1,c.1547T>C,PGM1:c.1547T>C,PGM1:c.1547T>C,1-63654414-T-C,rs587777401,1-64120085-T-C,rs587777401
3,PGM1,c.988G>C,PGM1:c.988G>C,PGM1:c.988G>C,"1-63636348-G-C, 1-63636294-G-C",rs777164338,"1-64102019-G-C, 1-64101965-G-C",rs777164338
4,PGM1,c.1129G>A,PGM1:c.1129G>A,PGM1:c.1129G>A,1-63638785-G-A,,1-64104456-G-A,
...,...,...,...,...,...,...,...,...
343,ECHS1,c.538A>G,ECHS1:c.538A>G,ECHS1:c.538A>G,10-133366970-T-C,rs557128093,10-135180474-T-C,rs557128093
344,ECHS1,c.713C>T,ECHS1:c.713C>T,ECHS1:c.713C>T,10-133366002-G-A,rs200584793,10-135179506-G-A,rs200584793
345,ECHS1,c.476A>G,ECHS1:c.476A>G,ECHS1:c.476A>G,10-133368961-T-C,rs375032130,10-135182465-T-C,rs375032130
346,ECHS1,c.8C>A,ECHS1:c.8C>A,ECHS1:c.8C>A,10-133373326-G-T,rs372408822,10-135186830-G-T,rs372408822


In [21]:
data.to_excel(r'C:/Users/GenepoweRx_Madhu/Downloads/Metabolic_myopathy_Lit_final_Positions_hg38_hg37.xlsx', index=False)

In [31]:
df_3['Covered/Not_Covered'].value_counts()

Covered        108
Not_Covered     20
Name: Covered/Not_Covered, dtype: int64

In [9]:
y = df_3[df_3['Covered/Not_Covered'] == 'Not_Covered']
y.Gene.value_counts()

EPM2A      38
KCNT1      30
SCN2A      13
SCN1B       8
SCN1A       7
EFHC1       5
RBFOX1      4
SPTAN1      2
GABRG2      2
GOSR2       2
CSTB        1
GABRA1      1
CACNA1H     1
Name: Gene, dtype: int64

In [10]:
z = df_3[df_3['Covered/Not_Covered'] == 'Covered']
z.Gene.value_counts()

KCNT1      156
SCN2A      145
SCN1A       84
EPM2A       62
SPTAN1      28
EFHC1       28
SCN1B       26
CACNA1H     26
GABRG2      22
RBFOX1      15
GABRA1      12
KCTD7        7
GOSR2        6
TBC1D24      4
PCDH19       3
CSTB         3
KCNC1        1
Name: Gene, dtype: int64

In [56]:
df_pall = pd.read_csv(r'C:/Users/GenepoweRx_Madhu/Downloads/Answers(2023-05-22).csv')
#df_pall = df_pall[['tutor_id', 'student_rating']]
df_pall

Unnamed: 0,no,task_id,task_state,review_state,tutor_admin_id,student_rating,admin_rating,admin_comment,chat_room_url,retake_request_reason,retake_request_reason_detail,student_id,queried_image_object_key,task_result_id,last_message_sent_at,last_completion_request_sent_at,created_at,updated_at,queried_image_url,review_id,tutor_email,is_valid_retake,is_valid_report,started_at,closed_at,solved_at,subject_tagged_by_tutor,channel_leave_status,tutor_center_id,tutor_center_name,keycloak_email,solution_image_url,reporter_id,reporter_type,pick_duration,student_email,chatroom_report_reason,chat_report_reason_detail,auth_id,tutor_id,reviewer_email,reason,reposted_at,engine_status
0,1,6844329,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_170904867_8af8e7453b6b9...,RETAKE_REQUEST_REASON_UNSPECIFIED,,c5f8d8a1-fab3-4b35-a81f-382620061bc6,app-server-student/2023/5/4/69837fb6-0838-4312...,6050107,2023-05-04 13:15:13,,2023-05-04 12:10:59,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,leena.dawar@triviumservice.com,,,2023-05-04 13:13:55,2023-05-04 13:16:30,2023-05-04 13:15:13,CALCULUS,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,01:02:55,anagp92003@gmail.com,UNSPECIFIED,,3e210a9c-f2a1-48c0-98ff-8d0cfb2ba2c9,4999,,WRONG_ANSWER,,SEARCH
1,2,6845537,CLOSED,TODO,Unassigned,3,,,sendbird_group_channel_178747312_1f67a3457fd11...,RETAKE_REQUEST_REASON_UNSPECIFIED,,8e94a0b1-9a79-4217-84fb-29e8c4dfe96e,app-server-student/2023/5/5/e99666ea-0ff4-4d3d...,6051197,2023-05-05 02:20:54,,2023-05-05 01:51:54,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,reema.agarwal@triviumservice.com,,,2023-05-05 02:00:10,2023-05-05 02:25:39,2023-05-05 02:20:54,ALGEBRA,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:08:16,unicornzslayyy101@gmail.com,UNSPECIFIED,,5ac49aa2-2721-4446-8a6e-eddc516fac62,6439,,WRONG_ANSWER,,SEARCH
2,3,6845783,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_229694720_9a5feba80b62c...,RETAKE_REQUEST_REASON_UNSPECIFIED,,11e3872e-6ee5-4bfc-ba68-4ca0b57bb207,app-server-student/2023/5/5/11f1d724-86bc-4ef3...,6051466,2023-05-05 06:26:12,,2023-05-05 05:43:02,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,daljeet.kaur1@triviumservice.com,,,2023-05-05 06:24:47,2023-05-05 21:01:39,2023-05-05 06:26:12,CALCULUS,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:41:45,xpxxzn2w6j@privaterelay.appleid.com,UNSPECIFIED,,16e7b590-58ca-4f1d-a0c1-99db8adb3e6d,3986,,WRONG_ANSWER,,SEARCH
3,4,6846091,CLOSED,TODO,Unassigned,2,,,sendbird_group_channel_212267434_23c1c3db36044...,RETAKE_REQUEST_REASON_UNSPECIFIED,,35133269-3b13-49ec-892e-08071ba47fe1,app-server-student/2023/5/5/1fa0a6ce-f3c5-4533...,6051776,2023-05-05 17:23:25,,2023-05-05 16:35:45,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,sudip.chakraborty@triviumservice.com,,,2023-05-05 17:15:08,2023-05-05 21:12:55,2023-05-05 17:23:24,ALGEBRA,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:39:22,anulpi26@gmail.com,UNSPECIFIED,,2defedfb-6db2-410d-be4f-3bbe3a06836b,6290,,WRONG_ANSWER,,SEARCH
4,5,6846238,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_170904867_6d26e51a4513b...,RETAKE_REQUEST_REASON_UNSPECIFIED,,f110d474-7323-4df1-ae74-402292af20bf,app-server-student/2023/5/5/6f0e89be-059a-453b...,6051963,2023-05-05 19:45:31,,2023-05-05 18:53:30,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,leena.dawar@triviumservice.com,,,2023-05-05 19:24:16,2023-05-05 19:52:09,2023-05-05 19:45:30,STATISTICS_AND_PROBABILITY,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:30:45,8cnp7b456t@privaterelay.appleid.com,UNSPECIFIED,,3e210a9c-f2a1-48c0-98ff-8d0cfb2ba2c9,4999,,WRONG_ANSWER,,SEARCH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158,159,6861731,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_213607036_7baa0af464d2d...,RETAKE_REQUEST_REASON_UNSPECIFIED,,e4873ed3-2bce-450f-aaad-7b813ec0c839,app-server-student/2023/5/20/2bf62e3b-b37e-4a7...,6066521,2023-05-20 05:29:11,,2023-05-20 05:04:47,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,swati.upadhyay@triviumservice.com,,,2023-05-20 05:04:52,2023-05-20 05:41:32,2023-05-20 05:29:11,ALGEBRA,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:00:05,kdrc7bmfmn@privaterelay.appleid.com,UNSPECIFIED,,ced21275-8aa6-4259-b2f6-2748c925db71,7801,,WRONG_ANSWER,,SEARCH
159,160,6861883,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_102230683_82bf4cc0f348e...,RETAKE_REQUEST_REASON_UNSPECIFIED,,ba23520a-c896-4538-b310-bdf7c7e2afed,app-server-student/2023/5/20/49e3c55c-4b44-4ef...,6066660,2023-05-20 23:10:21,,2023-05-20 22:55:30,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,swati.upadhyay@triviumservice.com,,,2023-05-20 23:01:05,2023-05-20 23:12:20,2023-05-20 23:05:42,ALGEBRA,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:05:35,drkaminker@gmail.com,UNSPECIFIED,,ced21275-8aa6-4259-b2f6-2748c925db71,7801,,WRONG_ANSWER,,SEARCH
160,161,6861888,CLOSED,TODO,Unassigned,2,,,sendbird_group_channel_168764795_4ab16331cf7f0...,RETAKE_REQUEST_REASON_UNSPECIFIED,,9d7c8eec-e930-4d18-8eba-a29398cc43ae,app-server-student/2023/5/20/6aba52b3-0057-4ea...,6066664,2023-05-20 23:17:18,,2023-05-20 23:13:40,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,salma.rani@triviumservice.com,,,2023-05-20 23:13:51,2023-05-20 23:26:51,2023-05-20 23:17:18,CALCULUS,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:00:10,9ph56rmmnj@privaterelay.appleid.com,UNSPECIFIED,,31f302ad-374d-4386-8f54-bd3d69a951f6,4928,,WRONG_ANSWER,,SEARCH
161,162,6861894,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_168764795_6d78296f9638e...,RETAKE_REQUEST_REASON_UNSPECIFIED,,9d7c8eec-e930-4d18-8eba-a29398cc43ae,app-server-student/2023/5/20/319c94c3-a911-4b3...,6066673,2023-05-21 00:00:10,,2023-05-20 23:44:38,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,salma.rani@triviumservice.com,,,2023-05-20 23:44:42,2023-05-21 00:12:48,2023-05-21 00:00:10,GEOMETRY,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:00:04,9ph56rmmnj@privaterelay.appleid.com,UNSPECIFIED,,31f302ad-374d-4386-8f54-bd3d69a951f6,4928,,WRONG_ANSWER,,SEARCH


In [59]:
df_pall = df_pall.sort_values('tutor_id')
df_pall

Unnamed: 0,no,task_id,task_state,review_state,tutor_admin_id,student_rating,admin_rating,admin_comment,chat_room_url,retake_request_reason,retake_request_reason_detail,student_id,queried_image_object_key,task_result_id,last_message_sent_at,last_completion_request_sent_at,created_at,updated_at,queried_image_url,review_id,tutor_email,is_valid_retake,is_valid_report,started_at,closed_at,solved_at,subject_tagged_by_tutor,channel_leave_status,tutor_center_id,tutor_center_name,keycloak_email,solution_image_url,reporter_id,reporter_type,pick_duration,student_email,chatroom_report_reason,chat_report_reason_detail,auth_id,tutor_id,reviewer_email,reason,reposted_at,engine_status
143,144,6859644,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_102230683_c64c394b4a629...,RETAKE_REQUEST_REASON_UNSPECIFIED,,ef4f661c-e7cd-4ee2-bb9a-27171cc54099,app-server-student/2023/5/18/71ac9d5b-466f-48b...,6064550,2023-05-18 01:25:25,,2023-05-18 01:14:45,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,rajesh.maurya@triviumservice.com,,,2023-05-18 01:16:15,2023-05-18 01:29:49,2023-05-18 01:25:25,ALGEBRA,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:01:30,samsro240@gmail.com,UNSPECIFIED,,5b01b061-9316-49ea-a5bb-06382107aa06,3979,,POOR_HANDWRITING,,SEARCH
34,35,6848620,CLOSED,TODO,Unassigned,2,,,sendbird_group_channel_102230683_352d7a2923d0a...,RETAKE_REQUEST_REASON_UNSPECIFIED,,904241c3-2c88-44bc-ac40-45f00b85a7b4,app-server-student/2023/5/8/e34665b5-fb32-486c...,6054236,2023-05-08 23:31:56,,2023-05-08 22:34:14,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,rajesh.maurya@triviumservice.com,,,2023-05-08 23:28:05,2023-05-09 01:42:43,2023-05-08 23:31:56,STATISTICS_AND_PROBABILITY,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:53:50,hpdt52px2g@privaterelay.appleid.com,UNSPECIFIED,,5b01b061-9316-49ea-a5bb-06382107aa06,3979,,WRONG_ANSWER,,SEARCH
155,156,6861555,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_102230683_3e1a35a71d460...,RETAKE_REQUEST_REASON_UNSPECIFIED,,9d7c8eec-e930-4d18-8eba-a29398cc43ae,app-server-student/2023/5/19/726dbe35-4d3a-494...,6066341,2023-05-19 23:08:49,,2023-05-19 22:51:49,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,rajesh.maurya@triviumservice.com,,,2023-05-19 22:58:42,2023-05-19 23:16:46,2023-05-19 23:08:49,ALGEBRA,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:06:53,9ph56rmmnj@privaterelay.appleid.com,UNSPECIFIED,,5b01b061-9316-49ea-a5bb-06382107aa06,3979,,WRONG_ANSWER,,SEARCH
153,154,6861530,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_102230683_7da13a8264e67...,RETAKE_REQUEST_REASON_UNSPECIFIED,,e4873ed3-2bce-450f-aaad-7b813ec0c839,app-server-student/2023/5/19/5d4cceee-7887-4da...,6066315,2023-05-19 22:37:44,,2023-05-19 22:15:10,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,rajesh.maurya@triviumservice.com,,,2023-05-19 22:33:18,2023-05-19 22:39:47,2023-05-19 22:37:44,STATISTICS_AND_PROBABILITY,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:18:08,kdrc7bmfmn@privaterelay.appleid.com,UNSPECIFIED,,5b01b061-9316-49ea-a5bb-06382107aa06,3979,,WRONG_ANSWER,,SEARCH
107,108,6856028,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_102230683_81f661ca32b8b...,RETAKE_REQUEST_REASON_UNSPECIFIED,,9c45836e-58dc-4d00-9886-4b22e396f15e,app-server-student/2023/5/15/6a0f4de8-594d-492...,6061149,2023-05-15 05:18:12,,2023-05-15 04:37:22,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,rajesh.maurya@triviumservice.com,,,2023-05-15 04:37:56,2023-05-15 05:32:53,2023-05-15 04:51:15,ALGEBRA,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:00:34,najirjones2@gmail.com,UNSPECIFIED,,5b01b061-9316-49ea-a5bb-06382107aa06,3979,,WRONG_ANSWER,,SEARCH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,71,6852884,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_172018090_e7fdf74ac541d...,RETAKE_REQUEST_REASON_UNSPECIFIED,,253be3fa-0148-4745-979d-fe9957fb1d44,app-server-student/2023/5/11/022c2e5f-8b3b-4d7...,6058170,2023-05-11 19:04:11,,2023-05-11 19:02:19,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,jayesh.savaliya@triviumservice.com,,,2023-05-11 19:02:23,2023-05-11 19:07:16,2023-05-11 19:04:11,STATISTICS_AND_PROBABILITY,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:00:04,allisahslanier@yahoo.com,UNSPECIFIED,,e6ac33e7-3a7c-4c9b-9ad5-d6958e147d3e,7924,,WRONG_ANSWER,,SEARCH
51,52,6850963,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_212855081_9464036b1a34f...,RETAKE_REQUEST_REASON_UNSPECIFIED,,35a94b7e-ac65-4215-a979-6b7ad479bd71,app-server-student/2023/5/10/892828ab-e2c5-412...,6056384,2023-05-10 16:07:49,,2023-05-10 15:30:55,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,jayesh.savaliya@triviumservice.com,,,2023-05-10 16:02:00,2023-05-10 16:33:57,2023-05-10 16:07:37,GEOMETRY,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:31:04,mariahs1303@icloud.com,UNSPECIFIED,,e6ac33e7-3a7c-4c9b-9ad5-d6958e147d3e,7924,,NO_EXPLANATION_WORK_SHOWN,,SEARCH
63,64,6851881,CLOSED,TODO,Unassigned,2,,,sendbird_group_channel_217344995_f7b1a7b48abfc...,RETAKE_REQUEST_REASON_UNSPECIFIED,,81f71d4f-0534-4bf0-a2e4-7ed940c4f75a,app-server-student/2023/5/11/8d5fca1f-b98f-4bd...,6057283,2023-05-11 02:36:06,,2023-05-11 01:33:15,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,meenakshi.narula1@triviumservice.com,,,2023-05-11 02:31:32,2023-05-11 03:13:02,2023-05-11 02:36:06,GEOMETRY,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:58:16,s543235@student.roundrockisd.org,UNSPECIFIED,,fa19ea3d-9a1a-4059-b36d-9d79e2c8e8ae,7970,,WRONG_ANSWER,,SEARCH
129,130,6857843,CLOSED,TODO,Unassigned,1,,,sendbird_group_channel_219682263_241a250a5ae49...,RETAKE_REQUEST_REASON_UNSPECIFIED,,fedb54ad-049c-444a-8d27-3579e287d406,app-server-student/2023/5/16/6569d0b1-4219-467...,6062876,2023-05-16 17:56:15,,2023-05-16 17:48:00,1970-01-01 00:00:00,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,-1,aniket.ghara@triviumservice.com,,,2023-05-16 17:53:25,2023-05-16 17:58:31,2023-05-16 17:56:06,STATISTICS_AND_PROBABILITY,NOT_LEAVE,4,backoffice.center.trivium,,https://d3hlidq5bf9vg0.cloudfront.net/app-serv...,,REPORTER_UNSPECIFIED,00:05:25,armandorojodavid@icloud.com,UNSPECIFIED,,7a606a7a-311e-4e31-8176-21d1bdad257c,8210,,WRONG_ANSWER,,SEARCH


In [61]:
grp1 = df_pall.groupby(['tutor_id']).agg({'student_rating': lambda x: ','.join(str(i) for i in x.unique())}).reset_index()
grp1

Unnamed: 0,tutor_id,student_rating
0,3979,12
1,3986,132
2,3996,12
3,4927,1
4,4928,132
5,4966,123
6,4980,13
7,4983,1
8,4986,31
9,4999,31


In [55]:
# Split the comma-separated values into a list of integers
grp1['value_list'] = grp1['student_rating'].apply(lambda x: [int(val) for val in x.split(',')])

# Calculate the maximum for each row and create a new column
grp1['max_value'] = grp1['value_list'].apply(max)
grp1 = grp1[['tutor_id', 'student_rating', 'max_value']]
grp1

Unnamed: 0,tutor_id,student_rating,max_value
0,3979,21,2
1,3986,123,3
2,3996,12,2
3,4927,1,1
4,4928,132,3
5,4966,123,3
6,4980,13,3
7,4983,1,1
8,4986,31,3
9,4999,13,3
