In [18]:
import pandas as pd
import os

In [19]:
# Process all tags
tags = [
    "flu", "cough", "fever", "headache", "lagnat", "rashes",
    "sipon", "ubo", "ecq", "face-shield", "Frontliners",
    "masks", "Quarantine", "social-distancing", "work-from-home"
]

In [20]:
def load_and_prepare_tag_data(tag):
    """Load a tag's CSV file and prepare it for merging."""
    filename = f"{tag}_msv_stitched_30day.csv"
    if not os.path.exists(filename):
        print(f"Warning: Could not find {filename}")
        return None
    
    try:
        # Read the CSV
        df = pd.read_csv(filename)
        
        # Ensure date column is properly formatted
        df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m-%d')
        
        # Rename the value column to the tag name (with spaces instead of dashes)
        tag_column = tag.replace('-', ' ')
        df = df.rename(columns={tag_column: tag_column})
        
        # Keep only date and value columns
        return df
    except Exception as e:
        print(f"Error processing {filename}: {str(e)}")
        return None

In [21]:
# Start with the first tag
first_tag = tags[0]
merged_df = load_and_prepare_tag_data(first_tag)

In [22]:
if merged_df is not None:
    # Merge each remaining tag
    for tag in tags[1:]:
        tag_df = load_and_prepare_tag_data(tag)
        if tag_df is not None:
            # Merge on date
            merged_df = pd.merge(merged_df, tag_df, on='date', how='outer')
    
    # Sort by date
    merged_df = merged_df.sort_values('date')
    
    # Save the merged file
    output_filename = "3_gt_msv_stitched_compute.csv"
    merged_df.to_csv(output_filename, index=False)
    print(f"\nSuccessfully created {output_filename}")
    print(f"Final shape: {merged_df.shape}")
    
    # Display first few rows to verify
    print("\nFirst few rows of the merged file:")
    print(merged_df.head())
else:
    print("Error: Could not process first tag file")


Successfully created 3_gt_msv_stitched_compute.csv
Final shape: (364, 16)

First few rows of the merged file:
         date    flu  cough  fever  headache  lagnat  rashes  sipon   ubo  \
0  2020-03-16  100.0   96.0   97.0      79.0    50.0    66.0  100.0  91.0   
1  2020-03-17   50.0   82.0   94.0      85.0    71.0    86.0   96.0  76.0   
2  2020-03-18   38.0   87.0   94.0      70.0    79.0    37.0   63.0  72.0   
3  2020-03-19   38.0   75.0   90.0      58.0    50.0    58.0   64.0  76.0   
4  2020-03-20   35.0   71.0   95.0      55.0    51.0    64.0   66.0  92.0   

   ecq  face shield  Frontliners  masks  Quarantine  social distancing  \
0  0.0          0.0         14.0   53.0       100.0              100.0   
1  0.0          0.0         24.0   46.0        46.0               64.0   
2  5.0         57.0         33.0   46.0        38.0               59.0   
3  0.0         53.0         19.0   69.0        36.0               50.0   
4  5.0         42.0         26.0   38.0        39.0     