# Data Loading and Merging

In [14]:
import pandas as pd
from pathlib import Path

### Defining base directory

In [15]:
BASE_DIR = Path("/Users/roshni/Desktop/DrParmar_Project/owml_project/data")

FAKENEWSNET_DIR = BASE_DIR / "FakeNewsNet"
WELFAKE_DIR     = BASE_DIR / "WELFake"
COAID_DIR       = BASE_DIR / "CoAID"

### Loading FakeNewsNet

In [16]:
print("\nLoading FakeNewsNet dataset...")               
real_path_fnn = FAKENEWSNET_DIR / "PolitiFact_real_news_content.csv"
fake_path_fnn = FAKENEWSNET_DIR / "PolitiFact_fake_news_content.csv"

fnn_real = pd.read_csv(real_path_fnn)           
fnn_fake = pd.read_csv(fake_path_fnn)

fnn_real["label"] = 1                                           # Real news labeled as 1
fnn_fake["label"] = 0                                           # Fake news labeled as 0

fnn_real = fnn_real.rename(columns={"text": "content"})         # Rename 'text' column to 'content'    
fnn_fake = fnn_fake.rename(columns={"text": "content"})

fnn_df = pd.concat([fnn_real, fnn_fake], ignore_index=True)     # Combine real and fake news dataframes
fnn_df["source"] = "FakeNewsNet"                                # Add source column    

print(f"FakeNewsNet loaded: {fnn_df.shape[0]} records")




Loading FakeNewsNet dataset...
FakeNewsNet loaded: 240 records


### Loading WELFake Dataset

In [None]:
print("Loading WELFake dataset...")
wel_path = WELFAKE_DIR / "WELFake.csv"
wel_df = pd.read_csv(wel_path)

wel_df = wel_df.rename(columns={"text": "content", "label": "label"})   # Renamed columns for consistency
wel_df = wel_df[["content", "label"]]                                   # Keeping only content and label columns
wel_df["source"] = "WELFake"                                            # Added source column    

print(f"WELFake loaded: {wel_df.shape[0]} records")

Loading WELFake dataset...
WELFake loaded: 72134 records


### Loading CoAID dataset

In [None]:
print("\nLoading CoAID dataset...")
coaid_path = COAID_DIR / "CoAID_News_Combined.csv"
coaid_df = pd.read_csv(coaid_path)

if "news" in coaid_df.columns:                  # Renames 'news' column to 'content' if it exists
    coaid_df = coaid_df.rename(columns={"news": "content"})
elif "text" in coaid_df.columns:
    coaid_df = coaid_df.rename(columns={"text": "content"})

coaid_df = coaid_df[["content", "label"]]       # Keeping only content and label columns
coaid_df["source"] = "CoAID"                    # Added source column

print(f"CoAID loaded: {coaid_df.shape[0]} records")


Loading CoAID dataset...
CoAID loaded: 5457 records
