In [None]:
# prompt: load my google drive
import pandas as pd
import numpy as np
from datetime import datetime
pd.set_option('display.max_columns', None)

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

professional_file_path = '/content/drive/MyDrive/Data/All FFS Claims/carrier.csv'
# LU Look Up file for health codes
LU_drg_file_path = '/content/drive/MyDrive/Data/HealthCodes/DRG.csv'

# Downloaded 2025 ICD Codes but could not find about 90 codes so looked up the remaining using Perplexity.AI
# Ideal way:  Download 2015-2025 ICD Codes, create a database and then do look up

LU_ICD_file_path = '/content/drive/MyDrive/Data/HealthCodes/ICD10Diagnosis.csv'
LU_ICD2_file_path = '/content/drive/MyDrive/Data/HealthCodes/ICD_DIAG_CD_RemainingCodes.csv'
# Reading DRG and Diagnosis Look Ups
LU_drg = pd.read_csv(LU_drg_file_path)[['DRG','Description']]
LU_drg.rename(columns={'Description': 'DRG_Description'}, inplace=True)

LU_drg['DRG'] = LU_drg['DRG'].fillna(0).astype(int).astype(str).str.zfill(3)

LU_ICD = pd.read_csv(LU_ICD_file_path)
LU_ICD.rename(columns={'Description': 'ICD_Description'}, inplace=True)

LU_ICD2 = pd.read_csv(LU_ICD2_file_path)[['ICD_DIAG_CD', 'Description']]
LU_ICD2.rename(columns={'Description': 'ICD_Description'}, inplace=True)

LU_ICD_final = pd.concat([LU_ICD, LU_ICD2])



In [None]:
# Read the CSV file
professional = pd.read_csv(professional_file_path, sep="|")
# How were columns read in?
col_professional = pd.DataFrame(professional.dtypes, columns=['type'])
col_professional



  professional = pd.read_csv(professional_file_path, sep="|")


Unnamed: 0,type
BENE_ID,int64
CLM_ID,int64
NCH_NEAR_LINE_REC_IDENT_CD,object
NCH_CLM_TYPE_CD,int64
CLM_FROM_DT,object
...,...
LINE_HCT_HGB_RSLT_NUM,float64
LINE_HCT_HGB_TYPE_CD,object
LINE_NDC_CD,float64
CARR_LINE_CLIA_LAB_NUM,object


Linking between diagnosis on the claim header and the treatment on the claim detail:  LINE_ICD_DGNS_CD
This is CMS implementation of the diagnosis pointer concept which allows for multiple ICD Diagnosis Codes to be referenced on each claim detail or treatment line

In [None]:
col_professional[col_professional.index.str.contains('NPI')]

Unnamed: 0,type
RFR_PHYSN_NPI,int64
CARR_CLM_BLG_NPI_NUM,int64
PRF_PHYSN_NPI,int64
ORG_NPI_NUM,int64


Note the NPI fields above.  '
BLG = Billing
RFR = Referring
ORG = Organization.  Usually a physician's group.
PRF = Performing/Rendering

In [None]:
professional['LINE_PLACE_OF_SRVC_CD'].value_counts()

Unnamed: 0_level_0,count
LINE_PLACE_OF_SRVC_CD,Unnamed: 1_level_1
11,926479
20,173831
22,11448
12,3850
31,3400
34,1955
2,41


Place of Service is a key concept in professional claims.  
11 = Office
12 = Home
20 = Urgent Care Facility
21 = Inpatient
22 = Outpatient Hospital
23 = ER
31 = Skilled Nursing Facility
34 = Hospice

We need to be careful not to double count encounters.  A inpatient encounter will have at least two claims, facility claim and a professional claim.  So let us make a simplifying assumption that we have already calculated encounters from claims and we only want to count "office visits" from professional claims.  Very simplistic assumption!  Please do not make this assumption on real claims data without carefully examining pros and cons.   

In [None]:
print(len(professional))
professional = professional[professional['LINE_PLACE_OF_SRVC_CD'] == 11]
print(len(professional))

1121004
926479


In [None]:
# Convert columns to appropriate data types
professional['BENE_ID'] = professional['BENE_ID'].astype(str)
professional['CLM_ID'] = professional['CLM_ID'].astype(str)

# Convert date columns
professional['CLM_FROM_DT'] = pd.to_datetime(professional['CLM_FROM_DT'], format='%d-%b-%Y')
professional['CLM_THRU_DT'] = pd.to_datetime(professional['CLM_THRU_DT'], format='%d-%b-%Y')

professional['CLAIM_DAYS'] = (professional['CLM_THRU_DT'] - professional['CLM_FROM_DT']).dt.days + 1


professional['YR'] = professional['CLM_THRU_DT'].dt.year

In [None]:
professional['CLAIM_DAYS'].value_counts()

Unnamed: 0_level_0,count
CLAIM_DAYS,Unnamed: 1_level_1
1,901926
2,24553


In [None]:
# prompt: filter professional for CLAM_DAYS == 1 and drop CLAIM_DAYS column
print(len(professional))
professional = professional[professional['CLAIM_DAYS'] == 1]
professional = professional.drop(columns=['CLAIM_DAYS'])
print(len(professional))

926479
901926


In [None]:
# prompt: for each CLM_ID, show me the max of LINE_NUM.  Filter professional for 5 CLM_IDS with the highest LINE_NUM

# Group by 'CLM_ID' and get the max of 'LINE_NUM' for each group
max_line_num_by_clm_id = professional.groupby('CLM_ID')['LINE_NUM'].max()

# Sort the result in descending order and get the top 5 CLM_IDs
top_5_clm_ids = max_line_num_by_clm_id.sort_values(ascending=False).head(5).index

# Filter the original DataFrame to include only the top 5 CLM_IDs
filtered_professional = professional[professional['CLM_ID'].isin(top_5_clm_ids)]
filtered_professional.head(30)

Unnamed: 0,BENE_ID,CLM_ID,NCH_NEAR_LINE_REC_IDENT_CD,NCH_CLM_TYPE_CD,CLM_FROM_DT,CLM_THRU_DT,NCH_WKLY_PROC_DT,CARR_CLM_ENTRY_CD,CLM_DISP_CD,CARR_NUM,CARR_CLM_PMT_DNL_CD,CLM_PMT_AMT,CARR_CLM_PRMRY_PYR_PD_AMT,RFR_PHYSN_UPIN,RFR_PHYSN_NPI,CARR_CLM_PRVDR_ASGNMT_IND_SW,NCH_CLM_PRVDR_PMT_AMT,NCH_CLM_BENE_PMT_AMT,NCH_CARR_CLM_SBMTD_CHRG_AMT,NCH_CARR_CLM_ALOWD_AMT,CARR_CLM_CASH_DDCTBL_APLD_AMT,CARR_CLM_HCPCS_YR_CD,CARR_CLM_RFRNG_PIN_NUM,PRNCPAL_DGNS_CD,PRNCPAL_DGNS_VRSN_CD,ICD_DGNS_CD1,ICD_DGNS_VRSN_CD1,ICD_DGNS_CD2,ICD_DGNS_VRSN_CD2,ICD_DGNS_CD3,ICD_DGNS_VRSN_CD3,ICD_DGNS_CD4,ICD_DGNS_VRSN_CD4,ICD_DGNS_CD5,ICD_DGNS_VRSN_CD5,ICD_DGNS_CD6,ICD_DGNS_VRSN_CD6,ICD_DGNS_CD7,ICD_DGNS_VRSN_CD7,ICD_DGNS_CD8,ICD_DGNS_VRSN_CD8,ICD_DGNS_CD9,ICD_DGNS_VRSN_CD9,ICD_DGNS_CD10,ICD_DGNS_VRSN_CD10,ICD_DGNS_CD11,ICD_DGNS_VRSN_CD11,ICD_DGNS_CD12,ICD_DGNS_VRSN_CD12,CLM_CLNCL_TRIL_NUM,CARR_CLM_BLG_NPI_NUM,LINE_NUM,CARR_PRFRNG_PIN_NUM,PRF_PHYSN_UPIN,PRF_PHYSN_NPI,ORG_NPI_NUM,CARR_LINE_PRVDR_TYPE_CD,TAX_NUM,PRVDR_STATE_CD,PRVDR_ZIP,PRVDR_SPCLTY,PRTCPTNG_IND_CD,CARR_LINE_RDCD_PMT_PHYS_ASTN_C,LINE_SRVC_CNT,LINE_CMS_TYPE_SRVC_CD,LINE_PLACE_OF_SRVC_CD,CARR_LINE_PRCNG_LCLTY_CD,LINE_1ST_EXPNS_DT,LINE_LAST_EXPNS_DT,HCPCS_CD,HCPCS_1ST_MDFR_CD,HCPCS_2ND_MDFR_CD,BETOS_CD,LINE_NCH_PMT_AMT,LINE_BENE_PMT_AMT,LINE_PRVDR_PMT_AMT,LINE_BENE_PTB_DDCTBL_AMT,LINE_BENE_PRMRY_PYR_CD,LINE_BENE_PRMRY_PYR_PD_AMT,LINE_COINSRNC_AMT,LINE_SBMTD_CHRG_AMT,LINE_ALOWD_CHRG_AMT,LINE_PRCSG_IND_CD,LINE_PMT_80_100_CD,LINE_SERVICE_DEDUCTIBLE,CARR_LINE_MTUS_CNT,CARR_LINE_MTUS_CD,LINE_ICD_DGNS_CD,LINE_ICD_DGNS_VRSN_CD,HPSA_SCRCTY_IND_CD,CARR_LINE_RX_NUM,LINE_HCT_HGB_RSLT_NUM,LINE_HCT_HGB_TYPE_CD,LINE_NDC_CD,CARR_LINE_CLIA_LAB_NUM,CARR_LINE_ANSTHSA_UNIT_CNT,YR
253455,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,1,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,,,,,105.2,0,105.2,0.0,,0,105.2,131.5,131.5,A,,,46,,T7432X,0,,,0.0,R1,,,1,2020
253456,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,2,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,,,,,108.8,0,108.8,0.0,,0,108.8,136.0,136.0,A,,,46,,,0,,,0.0,R1,,,1,2020
253457,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,3,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,99495,,,,455.99,0,455.99,0.0,,0,455.99,569.99,569.99,A,,,46,,,0,,,0.0,R1,,,1,2020
253458,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,4,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,,,,,0.0,0,0.0,0.0,,0,0.0,0.0,0.0,A,,,46,,,0,,,0.0,R1,,,1,2020
253459,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,5,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,,,,,61.89,0,61.89,0.0,,0,61.89,77.36,77.36,A,,,46,,,0,,,0.0,R1,,,1,2020
253460,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,6,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,,,,,61.89,0,61.89,0.0,,0,61.89,77.36,77.36,A,,,46,,T7432X,0,,,0.0,R1,,,1,2020
253461,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,7,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,,,,,0.0,0,0.0,0.0,,0,0.0,0.0,0.0,A,,,46,,,0,,,0.0,R1,,,1,2020
253462,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,8,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,G8839,,,M5B,396.92,0,396.92,0.0,,0,396.92,496.15,496.15,A,,,46,,,0,,,0.0,R1,,,1,2020
253463,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,9,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,,,,,61.89,0,61.89,0.0,,0,61.89,77.36,77.36,A,,,46,,,0,,,0.0,R1,,,1,2020
253464,-10000010261699,-10000930350869,O,71,2020-02-10,2020-02-10,14-Feb-2020,1,1,952,1,1711.39,0.0,,9999958595,A,1711.39,0,2139.23,1711.39,0.0,1,,T7432X,0,Z733,0,N182,0.0,R801,0.0,T50905,0.0,E1121,0.0,Z604,0.0,M5450,0.0,T7431X,0.0,Z653,0.0,E8881,0.0,P2832,0.0,G479,0.0,,1104905702,10,,,9999958595,1104905702,0,999877063,IL,601601605,1,4,0,28,1,11,27,10-Feb-2020,10-Feb-2020,,,,,61.89,0,61.89,0.0,,0,61.89,77.36,77.36,A,,,46,,,0,,,0.0,R1,,,1,2020


From perusal of the above claims, it appears that the performing NPI is the same on all claim lines.  

In [None]:
# prompt: For CLM_IDs with MAX of LINE_NUM >1, I want to know if any CLM_IDS have more than 1 value of PRF_PHYSN_NPI

# Group by 'CLM_ID' and get the max of 'LINE_NUM' for each group
max_line_num_by_clm_id = professional.groupby('CLM_ID')['LINE_NUM'].max()

# Filter for CLM_IDs with MAX of LINE_NUM > 1
clm_ids_with_multiple_lines = max_line_num_by_clm_id[max_line_num_by_clm_id > 1].index

# Filter the original DataFrame to include only those CLM_IDs
filtered_professional = professional[professional['CLM_ID'].isin(clm_ids_with_multiple_lines)]

# Group by 'CLM_ID' and 'PRF_PHYSN_NPI' and count occurrences
npi_counts = filtered_professional.groupby(['CLM_ID', 'PRF_PHYSN_NPI'])['PRF_PHYSN_NPI'].count()

# Check for CLM_IDs with more than one unique PRF_PHYSN_NPI
clm_ids_with_multiple_npis = npi_counts.groupby('CLM_ID').size()[npi_counts.groupby('CLM_ID').size() > 1].index

# Print the CLM_IDs that meet the criteria
print("CLM_IDs with more than one PRF_PHYSN_NPI:")
clm_ids_with_multiple_npis

CLM_IDs with more than one PRF_PHYSN_NPI:


Index([], dtype='object', name='CLM_ID')

In [None]:
# checking and understanding AI generated code

# Group by 'CLM_ID' and get the max of 'LINE_NUM' for each group
max_line_num_by_clm_id = professional.groupby('CLM_ID')['LINE_NUM'].max()

# Filter for CLM_IDs with MAX of LINE_NUM > 1
clm_ids_with_multiple_lines = max_line_num_by_clm_id[max_line_num_by_clm_id > 1].index

# Filter the original DataFrame to include only those CLM_IDs
filtered_professional = professional[professional['CLM_ID'].isin(clm_ids_with_multiple_lines)]

# Group by 'CLM_ID' and 'PRF_PHYSN_NPI' and count occurrences
npi_counts = filtered_professional.groupby(['CLM_ID', 'PRF_PHYSN_NPI'])['PRF_PHYSN_NPI'].count()
npi_counts

Unnamed: 0_level_0,Unnamed: 1_level_0,PRF_PHYSN_NPI
CLM_ID,PRF_PHYSN_NPI,Unnamed: 2_level_1
-10000930037915,9999971093,11
-10000930037916,9999971093,13
-10000930037917,9999971093,14
-10000930037918,9999971093,14
-10000930037919,9999971093,23
...,...,...
-10000931486358,9999916593,15
-10000931486359,9999916593,15
-10000931486360,9999916593,9
-10000931486361,9999916593,11


In [None]:
npi_counts.groupby('CLM_ID').size()

Unnamed: 0_level_0,PRF_PHYSN_NPI
CLM_ID,Unnamed: 1_level_1
-10000930037915,1
-10000930037916,1
-10000930037917,1
-10000930037918,1
-10000930037919,1
...,...
-10000931486358,1
-10000931486359,1
-10000931486360,1
-10000931486361,1


In [None]:
# prompt: the following line is hard to understand.
# clm_ids_with_multiple_npis = npi_counts.groupby('CLM_ID').size()[npi_counts.groupby('CLM_ID').size() > 1].index.  Break it up in two steps

# Calculate the size of each group
clm_id_sizes = npi_counts.groupby('CLM_ID').size()

# Filter for CLM_IDs with a size greater than 1
clm_ids_with_multiple_npis = clm_id_sizes[clm_id_sizes > 1].index

# Print the CLM_IDs that meet the criteria
print("CLM_IDs with more than one PRF_PHYSN_NPI:")
clm_ids_with_multiple_npis

In [None]:
# keep only header information
print(len(professional))
professional = professional[professional['LINE_NUM'] == 1]
print(len(professional))



901926
70159


In [None]:
diagnosis = professional[['BENE_ID', 'YR', 'PRNCPAL_DGNS_CD'] +
                      [col for col in professional.columns if col.startswith('ICD_DGNS_CD')]]

# Wide to long
diagnosis = pd.melt(diagnosis,
                    id_vars=['BENE_ID', 'YR'],
                    var_name='ICD_DIAG_COL',
                    value_name='ICD_DIAG_CD')

# Remove rows where ICD_DIAG_CD is not populated
diagnosis = diagnosis[diagnosis['ICD_DIAG_CD'].notna() & (diagnosis['ICD_DIAG_CD'] != '')]

# Drop the 'ICD_DIAG_COL' column
diagnosis = diagnosis.drop(columns=['ICD_DIAG_COL'])

# Remove duplicates
diagnosis = diagnosis.drop_duplicates()
print(len(diagnosis))



536883


In [None]:

diagnosis = pd.merge(diagnosis, LU_ICD_final, on='ICD_DIAG_CD', how='left')


In [None]:
num_diagnosis = diagnosis.groupby(['BENE_ID', 'YR']).size().reset_index(name='NUM_DIAG')

In [None]:
professional_office_encounters = professional[['BENE_ID', 'CLM_ID', 'CLM_FROM_DT',
                            'CLM_THRU_DT', 'YR', 'PRNCPAL_DGNS_CD', 'CLM_PMT_AMT']].drop_duplicates()

professional_office_claims_encounters = professional_office_claims_header.merge(LU_ICD_final,
                                      left_on='PRNCPAL_DGNS_CD',
                                      right_on='ICD_DIAG_CD',
                                      how='left').drop(columns=['ICD_DIAG_CD'])


In [None]:
professional_office_encounters

Unnamed: 0,BENE_ID,CLM_ID,CLM_FROM_DT,CLM_THRU_DT,YR,PRNCPAL_DGNS_CD,CLM_PMT_AMT,ICD_Description
0,-10000010254618,-10000930037915,2015-09-28,2015-09-28,2015,R4689,932.69,Other symptoms and signs involving appearance ...
1,-10000010254618,-10000930037916,2016-10-03,2016-10-03,2016,R4689,745.12,Other symptoms and signs involving appearance ...
2,-10000010254618,-10000930037917,2017-10-09,2017-10-09,2017,R4689,1040.54,Other symptoms and signs involving appearance ...
3,-10000010254618,-10000930037918,2018-10-15,2018-10-15,2018,R4689,1131.18,Other symptoms and signs involving appearance ...
4,-10000010254618,-10000930037919,2019-10-21,2019-10-21,2019,R4689,1382.27,Other symptoms and signs involving appearance ...
...,...,...,...,...,...,...,...,...
71903,-10000010288007,-10000931486358,2018-06-12,2018-06-12,2018,Z608,1953.80,Other problems related to social environment
71904,-10000010288007,-10000931486359,2019-06-18,2019-06-18,2019,Z608,1677.17,Other problems related to social environment
71905,-10000010288007,-10000931486360,2020-06-23,2020-06-23,2020,Z608,273.51,Other problems related to social environment
71906,-10000010288007,-10000931486361,2021-06-29,2021-06-29,2021,Z608,1323.33,Other problems related to social environment




In [None]:

professional_office_claims_header.to_csv('/content/drive/MyDrive/Data/Output Data/professional_office_claims_header.csv', index=False)
diagnosis.to_csv('/content/drive/MyDrive/Data/Output Data/professional_office_diagnosis.csv', index=False)