<a href="https://colab.research.google.com/github/sandesarikeerthi/2303a51635-batch-23/blob/main/lab04.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

def process_course_feedback(file_path='course_feedback.csv'):
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None

    print("--- Original DataFrame Info ---")
    df.info()
    print("\nOriginal DataFrame Head:")
    print(df.head())
    print("\n" + "="*50 + "\n")

    df_cleaned = df.dropna(subset=['Rating'])
    print("--- DataFrame after removing rows with missing 'Rating' ---")
    print(df_cleaned.head())
    print(f"\nNumber of rows after removing NaNs: {len(df_cleaned)}")
    print("\n" + "="*50 + "\n")

    meaningful_feedback = df_cleaned[
        (df_cleaned['Course_Name'] == 'Python for Data Science') &
        (df_cleaned['Rating'] >= 4)
    ].copy()

    if not meaningful_feedback.empty:
        print("--- Meaningful Feedback for 'Python for Data Science' (Rating >= 4) ---")
        feedback_display = meaningful_feedback[['User_ID', 'Feedback']]
        print(feedback_display)
    else:
        print("No meaningful feedback found for 'Python for Data Science' with a rating of 4 or higher.")

    return meaningful_feedback

if __name__ == '__main__':
    sample_data = {
        'User_ID': ['u101', 'u102', 'u103', 'u104', 'u105', 'u106', 'u107'],
        'Course_Name': ['Python for Data Science', 'Machine Learning', 'Python for Data Science', 'Web Development', 'Python for Data Science', 'Python for Data Science', 'Machine Learning'],
        'Rating': [5, 3, np.nan, 4, 4, 2, 5],
        'Feedback': ['Excellent course, very clear and practical!', 'Could improve the pace.', 'The rating is missing for this one.', 'Good beginner course.', 'Very helpful, especially the pandas section.', 'Not what I expected.', 'Comprehensive and well-explained.'],
        'Date': ['2023-01-15', '2023-02-20', '2023-03-10', '2023-04-05', '2023-05-18', '2023-06-01', '2023-06-25']
    }
    sample_df = pd.DataFrame(sample_data)
    sample_df.to_csv('course_feedback.csv', index=False)

    filtered_df = process_course_feedback()

    if filtered_df is not None:
        print("\n" + "="*50 + "\n")
        print("--- Final Filtered DataFrame (returned from function) ---")
        print(filtered_df)

--- Original DataFrame Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   User_ID      7 non-null      object 
 1   Course_Name  7 non-null      object 
 2   Rating       6 non-null      float64
 3   Feedback     7 non-null      object 
 4   Date         7 non-null      object 
dtypes: float64(1), object(4)
memory usage: 412.0+ bytes

Original DataFrame Head:
  User_ID              Course_Name  Rating  \
0    u101  Python for Data Science     5.0   
1    u102         Machine Learning     3.0   
2    u103  Python for Data Science     NaN   
3    u104          Web Development     4.0   
4    u105  Python for Data Science     4.0   

                                       Feedback        Date  
0   Excellent course, very clear and practical!  2023-01-15  
1                       Could improve the pace.  2023-02-20  
2           The rating is mi

In [2]:
import pandas as pd
import numpy as np

def process_air_quality_data(file_path='air_quality.csv'):
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None

    df_filled = df.fillna(method='ffill')
    print("--- DataFrame after forward filling missing values ---")
    print(df_filled)
    print("\n" + "="*50 + "\n")

    polluted_days = df_filled[
        (df_filled['PM2.5'] > 150) &
        (df_filled['NO2'] > 80)
    ]

    if not polluted_days.empty:
        print("--- Filtered records for polluted days ---")
        print(polluted_days)
    else:
        print("No days found that meet the polluted criteria (PM2.5 > 150 and NO2 > 80).")

    return polluted_days

if __name__ == '__main__':
    sample_data = {
        'Location': ['City A', 'City A', 'City B', 'City A', 'City B'],
        'PM2.5': [160, 145, np.nan, 180, 100],
        'PM10': [200, 170, 190, 220, 150],
        'NO2': [85, 75, 90, 82, 60],
        'SO2': [50, 45, 40, 55, 30],
        'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']
    }
    sample_df = pd.DataFrame(sample_data)
    sample_df.to_csv('air_quality.csv', index=False)

    polluted_df = process_air_quality_data()

    if polluted_df is not None:
        print("\n" + "="*50 + "\n")
        print("--- Final Filtered DataFrame (returned from function) ---")
        print(polluted_df)


--- DataFrame after forward filling missing values ---
  Location  PM2.5  PM10  NO2  SO2        Date
0   City A  160.0   200   85   50  2023-01-01
1   City A  145.0   170   75   45  2023-01-02
2   City B  145.0   190   90   40  2023-01-03
3   City A  180.0   220   82   55  2023-01-04
4   City B  100.0   150   60   30  2023-01-05


--- Filtered records for polluted days ---
  Location  PM2.5  PM10  NO2  SO2        Date
0   City A  160.0   200   85   50  2023-01-01
3   City A  180.0   220   82   55  2023-01-04


--- Final Filtered DataFrame (returned from function) ---
  Location  PM2.5  PM10  NO2  SO2        Date
0   City A  160.0   200   85   50  2023-01-01
3   City A  180.0   220   82   55  2023-01-04


  df_filled = df.fillna(method='ffill')
