# Exploratory Data Analysis (EDA) for ASAP-Review Dataset

In [3]:
# lib
import pandas as pd

# Upload dataset
try:
    df = pd.read_csv('asap_reviews.csv')
except FileNotFoundError:
    print("Dataset not found. Creating a dummy DataFrame for demonstration.")
    data = {
        'review_text': [
            "This paper proposes a novel method. It is motivated by previous failures. The main claim is strong. Experiments support the claim.",
            "A significant contribution. Minor motivation. Weak claims. No experimental support."
        ],
        'Contribution': [1, 1],
        'Motivation': [1, 1],
        'Claims': [1, 1],
        'Support': [1, 0],
        'acceptance': [1, 0]
    }
    df = pd.DataFrame(data)

df.head()


ModuleNotFoundError: No module named 'pandas'

## Descriptive Statistics

In [None]:
# Number of reviews
print("Number of reviews:", len(df))

# Average review length
avg_len = df['review_text'].apply(lambda x: len(x.split())).mean()
print("Average review length (words):", avg_len)

# Aspect rate
aspects = ['Contribution', 'Motivation', 'Claims', 'Support']
for aspect in aspects:
    rate = (df[aspect] == 1).mean() * 100
    print(f"{aspect} presence rate: {rate:.2f}%")

# Acceptance rate
acceptance_rate = (df['acceptance'] == 1).mean() * 100
print("Acceptance rate:", acceptance_rate)


## Profiling Results

In [None]:

both_contrib_motiv = df[(df['Contribution'] == 1) & (df['Motivation'] == 1)]
print("Acceptance rate when Contribution and Motivation are both present:", (both_contrib_motiv['acceptance'] == 1).mean() * 100)

# only claims
only_claims = df[(df['Claims'] == 1) & (df['Contribution'] == 0) & (df['Motivation'] == 0)]
if not only_claims.empty:
    print("Acceptance rate when only Claims are present:", (only_claims['acceptance'] == 1).mean() * 100)
else:
    print("No reviews with only Claims present in this dataset.")
