In [1]:
import pandas as pd

names = pd.Series(['apples', 'oranges', 'kiwi'])
df = names.to_frame(name='Name')

# add new coulmn
df['Price'] = [50, 30, 80]
print(df)

      Name  Price
0   apples     50
1  oranges     30
2     kiwi     80


In [3]:
import pandas as pd

try:
    df = pd.read_csv('hospital_data.csv')
    print("\n Original Hospital DataFrame :\n")
    print(df)
    
# grouping by department
    grouped = df.groupby('Department')['Bill'].mean()
    print("\n Average Medical Cost by Department :\n")
    print(grouped)
except FileNotFoundError:
    print("Error: 'Hospital_data.csv' not found.")


 Original Hospital DataFrame :

  Patient_ID    Name   Age  Department Admission_Date      Bill
0       P001   Vijay  34.0      Carona     2025-01-15    7000.8
1       P002    Ajay  30.0      Cancer     2025-02-10   76000.0
2       P003   Kiran   NaN   Neurology     2025-03-03   10000.0
3       P004   Amith  60.0  Cardiology     2025-01-22  200000.0
4       P005  Suresh  25.0      Cancer     2025-04-06   80000.0
5       P006     NaN  50.0         ICU     2025-08-04       NaN

 Average Medical Cost by Department :

Department
Cancer         78000.0
Cardiology    200000.0
Carona          7000.8
ICU                NaN
Neurology      10000.0
Name: Bill, dtype: float64


In [13]:
import pandas as pd

pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', None)

try:
    df = pd.read_csv('hospital_data.csv')
    print("\n Original Hospital DataFrame :\n")
    print(df)

    # add discount_cost column (10% discount)
    df['Discount'] = df['Bill'] - df['Bill']*0.9
    df['Total Amount'] = df['Bill']*0.9

    # sorting by bill
    sorted_df = df.sort_values('Bill', ascending=False)
    print("\n Sorted by Medical Bills (descending order) :\n")
    print(sorted_df)

except FileNotFoundError:
    print("Error: 'Hospital_data.csv' not found.")


 Original Hospital DataFrame :

  Patient_ID    Name   Age  Department Admission_Date      Bill
0       P001   Vijay  34.0      Carona     2025-01-15    7000.8
1       P002    Ajay  30.0      Cancer     2025-02-10   76000.0
2       P003   Kiran   NaN   Neurology     2025-03-03   10000.0
3       P004   Amith  60.0  Cardiology     2025-01-22  200000.0
4       P005  Suresh  25.0      Cancer     2025-04-06   80000.0
5       P006     NaN  50.0         ICU     2025-08-04       NaN

 Sorted by Medical Bills (descending order) :

  Patient_ID    Name   Age  Department Admission_Date      Bill  Discount  Total Amount
3       P004   Amith  60.0  Cardiology     2025-01-22  200000.0  20000.00     180000.00
4       P005  Suresh  25.0      Cancer     2025-04-06   80000.0   8000.00      72000.00
1       P002    Ajay  30.0      Cancer     2025-02-10   76000.0   7600.00      68400.00
2       P003   Kiran   NaN   Neurology     2025-03-03   10000.0   1000.00       9000.00
0       P001   Vijay  34.0     

In [15]:
import pandas as pd

try:
    df = pd.read_csv('hospital_data.csv')
    print("\n Original Hospital DataFrame :\n")
    print(df)

# add a status column based on age
    df['Status'] = df['Age'].apply(lambda x: 'Senior' if x>=50 else 'Adult' if x>=10 else 'unknown')
    print("\n Data Frame with status column :\n")
    print(df)

# saving to csv
    df.to_csv('hospital_data_updated.csv', index=False)
    print("\n Modified Dataframe saved to 'hospital_data_updated.csv'.")
except FileNotFoundError:
    print("Error: 'Hospital_data.csv' not found.")


 Original Hospital DataFrame :

  Patient_ID    Name   Age  Department Admission_Date      Bill
0       P001   Vijay  34.0      Carona     2025-01-15    7000.8
1       P002    Ajay  30.0      Cancer     2025-02-10   76000.0
2       P003   Kiran   NaN   Neurology     2025-03-03   10000.0
3       P004   Amith  60.0  Cardiology     2025-01-22  200000.0
4       P005  Suresh  25.0      Cancer     2025-04-06   80000.0
5       P006     NaN  50.0         ICU     2025-08-04       NaN

 Data Frame with status column :

  Patient_ID    Name   Age  Department Admission_Date      Bill   Status
0       P001   Vijay  34.0      Carona     2025-01-15    7000.8    Adult
1       P002    Ajay  30.0      Cancer     2025-02-10   76000.0    Adult
2       P003   Kiran   NaN   Neurology     2025-03-03   10000.0  unknown
3       P004   Amith  60.0  Cardiology     2025-01-22  200000.0   Senior
4       P005  Suresh  25.0      Cancer     2025-04-06   80000.0    Adult
5       P006     NaN  50.0         ICU     202

In [17]:
import pandas as pd

try:
    df = pd.read_csv('hospital_data.csv')
    series = df['Bill']
    print("\n Original Hospital Bill :\n")
    print(series)

# user manual input
    print("\n Enter Patient_ID to update : ",end=" ")
    patient_id = input().strip()
    print(f"Enter new bill for {patient_id} : ",end=" ")
    new_cost = float(input())

# update bill series and save
    if patient_id in df['Patient_ID'].values:
        index = df[df['Patient_ID'] == patient_id].index[0]
        series[index] = new_cost
        print("\n Updated Medical Bill Series :\n")
        print(series)
        df['Bill'] = series
        df.to_csv('hospital_data.csv', index=False)
        print("Updated csv saved to 'hospital_data.csv'.")
    else:
        print (f"Error: patient_id {patient_id} not found. ")
except FileNotFoundError:
    print("Error: 'hospital_data.csv' not found.")


 Original Hospital Bill :

0      7000.8
1     76000.0
2     10000.0
3    200000.0
4     80000.0
5         NaN
Name: Bill, dtype: float64

 Enter Patient_ID to update :  

 P003


Enter new bill for P003 :  

 15000.55



 Updated Medical Bill Series :

0      7000.80
1     76000.00
2     15000.55
3    200000.00
4     80000.00
5          NaN
Name: Bill, dtype: float64
Updated csv saved to 'hospital_data.csv'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  series[index] = new_cost


In [None]:
Data Cleaning :

1. Handling missing data (NaN)
isna()- identity missing value
fillna()-specific value
dropna()- delete missing value

2. remove duplicate values
duplicated()- checks duplicates
drop_duplicates()

3. correcting invalid data
12/12/20255 dates, ages, cost(-ve)
clip()

4. standardizing / normalizing string data:
str.lower(), str.upper(), str.strip(), str.replace()

5. data convertion
pd.to_numeric()
pd.to_datetime()
srrors='coerce

In [15]:
import pandas as pd

try:
    df = pd.read_csv('hospital_data.csv')
    series = df['Name']
    print("\n original Name Series :\n")
    print(series)
    clean_series = series.str.title().str.strip()
    print("\nName series after standardizing with (titlecase, strripped space) :\n")
    print(clean_series)
    
#saving to csv dataframe
    df['Name'] = clean_series
    df.to_csv('hospital_data.csv', index=False)
    print("\nData saved to csv file")
except FileNotFoundError:
    print("Error: 'hospital_data.csv' not found.")


 original Name Series :

0     vijay
1      ajay
2     kiran
3     amith
4    suresh
5       NaN
Name: Name, dtype: object

Name series after standardizing with (titlecase, strripped space) :

0     Vijay
1      Ajay
2     Kiran
3     Amith
4    Suresh
5       NaN
Name: Name, dtype: object

Data saved to csv file


In [50]:
import pandas as pd
import numpy as np

try:
    df = pd.read_csv('hospital_data.csv')
    series = df['Age']
    print("\n Original age series :\n")
    print(series)
    
# replace invalid ages <0 or >120 with NaN
    clean_series = series.where((series>=0) & (series<=120), np.nan)
    print("\n Age series after replacing invalid ages with NaN :\n")
    print(clean_series)
    df['Age'] = clean_series
    df.to_csv('hospital_data.csv', index=False)
    print("\n updated csv saved to 'hospital_data.csv'.")
except FileNotFoundError:
    print("Error: 'hospital_data.csv' not found.")


 Original age series :

0     NaN
1     NaN
2     NaN
3    60.0
4    25.0
5     NaN
Name: Age, dtype: float64

 Age series after replacing invalid ages with NaN :

0     NaN
1     NaN
2     NaN
3    60.0
4    25.0
5     NaN
Name: Age, dtype: float64

 updated csv saved to 'hospital_data.csv'.


In [58]:
import pandas as pd

try:
    df = pd.read_csv('hospital_data.csv')
    series = df['Admission_Date']
    print("\n Original Admission_Date series :\n")
    print(series)
    
# convert string to datetime
    date_series = pd.to_datetime(series, format='%Y-%m-%d')
    print("\n Admission Date series after converting to datetime : \n")
    print(date_series)
    
# update and save the dataframe
    df['Admission_Date' ]= date_series
    df.to_csv('hospital_data.csv', index=False)
    print("\n Updated CSV saved to 'hospital_data.csv'")
except FileNotFoundError:
    print("Error: 'hospital_data.csv' not found.")


 Original Admission_Date series :

0    2025-01-15
1    2025-02-10
2    2025-03-03
3    2025-01-22
4    2025-04-06
5    2025-08-04
Name: Admission_Date, dtype: object

 Admission Date series after converting to datetime : 

0   2025-01-15
1   2025-02-10
2   2025-03-03
3   2025-01-22
4   2025-04-06
5   2025-08-04
Name: Admission_Date, dtype: datetime64[ns]

 Updated CSV saved to 'hospital_data.csv'


In [62]:
import pandas as pd

try:
    df = pd.read_csv('hospital_data.csv')
    series= df['Admission_Date']
    print("\n Original Admission_Date series")
    print(series)
    print("\n Convert Admission dateformat from y/m/d to d/m/y:")
    confirm = input().strip().lower()
    if confirm == 'yes':
        date_series= pd.to_datetime(series, format='%Y-%m-%d')
        f_series= date_series.dt.strftime('%d-%m-%Y')
        print("\n Admission date series after convertion to DD-MM-YYYY:")
        print(f_series)
# update and save the dataframe
        df['Admission_Date' ]= f_series
        df.to_csv('hospital_data.csv', index=False)
        print("\n Updated CSV saved to 'hospital_data.csv'")
    else:
        print("\n Updated format conversion cancelled.")
except FileNotFoundError:
    print("Error: 'hospital_data.csv' not found.")


 Original Admission_Date series
0    2025-01-15
1    2025-02-10
2    2025-03-03
3    2025-01-22
4    2025-04-06
5    2025-08-04
Name: Admission_Date, dtype: object

 Convert Admission dateformat from y/m/d to d/m/y:


 yes



 Admission date series after convertion to DD-MM-YYYY:
0    15-01-2025
1    10-02-2025
2    03-03-2025
3    22-01-2025
4    06-04-2025
5    04-08-2025
Name: Admission_Date, dtype: object

 Updated CSV saved to 'hospital_data.csv'
