In [1]:
import pandas as pd
import os


csv_filename = "vitv_feedback.csv"

def load_and_preprocess_data(file_path):
 
    if not os.path.exists(file_path):
        print(f"Error: The file '{file_path}' was not found.")
        print("Please ensure the CSV file is in the same directory as the script.")
        return None
        
    print(f"Loading data from '{file_path}'...")
   
    df = pd.read_csv(file_path)
    

    print("\n--- Data Verification ---")
    print("Successfully loaded the dataset. Here are the first 5 rows:")
    
    print(df.head())
    
    print(f"\nTotal records found: {len(df)}")
    

    print("\nChecking for missing data in key columns...")
    missing_data = df[['latitude', 'longitude', 'feedback_text']].isnull().sum()
    
    if missing_data.sum() == 0:
        print("✅ No missing geotags or feedback text. The dataset is ready!")
    else:
        print("⚠️ Warning: Missing data found:")
        print(missing_data[missing_data > 0])
        
    return df


if __name__ == "__main__":
    
    print("--- Phase 2: Data Acquisition & Pre-processing ---")
    feedback_data = load_and_preprocess_data(csv_filename)
    
    if feedback_data is not None:
        print("\nPhase 2 completed successfully. The data is loaded and verified.")

--- Phase 2: Data Acquisition & Pre-processing ---
Loading data from 'vitv_feedback.csv'...

--- Data Verification ---
Successfully loaded the dataset. Here are the first 5 rows:
          location_name  latitude  longitude  \
0  Silver Jubilee Tower   12.9713    79.1575   
1                Foodys   12.9701    79.1578   
2      Technology Tower   12.9720    79.1593   
3       Anna Auditorium   12.9693    79.1561   
4       G D Naidu Block   12.9729    79.1610   

                                       feedback_text  
0  The library in SJT is amazing, so quiet and ha...  
1  The service at Foodys is too slow during peak ...  
2  Wi-Fi in TT is unreliable. It disconnects freq...  
3  Attended a great event at Anna Auditorium. The...  
4  The classrooms in GDN are well-maintained and ...  

Total records found: 8

Checking for missing data in key columns...
✅ No missing geotags or feedback text. The dataset is ready!

Phase 2 completed successfully. The data is loaded and verified.


Collecting vader
  Downloading vader-0.0.3-py3-none-any.whl.metadata (2.3 kB)
Collecting sonopy (from vader)
  Downloading sonopy-0.1.2.tar.gz (3.3 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Downloading vader-0.0.3-py3-none-any.whl (4.8 MB)
   ---------------------------------------- 0.0/4.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/4.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/4.8 MB 262.6 kB/s eta 0:00:19
   ---------------------------------------- 0.0/4.8 MB 262.6 kB/s eta 0:00:19
   ---------------------------------------- 0.0/4.8 MB 262.6 kB/s eta 0:00:19
    --------------------------------------- 0.1/4.8 MB 218.8 kB/s eta 0:00:22
    --------------------------------------- 0.1/4.8 MB 218.8 kB/s eta 0:00:22
    --------------------------------------- 0.1/4.8 MB 196.9 kB/s eta 0:00:24
    --------------------------------------- 0.1/4.8 MB 196.9 kB/s eta 0:00:24
    --------