# RQ4: How frequently do developers use the conversion functions?


In [4]:
import pandas as pd
from pathlib import Path
# Load the dataset
df = pd.read_csv(Path('../../data/sfconvertbot_pr_metadata.csv'))
# set NaN values to None
df = df.where(pd.notnull(df), None)

In [15]:
from tqdm import tqdm
import json
all_events = set()

# Count the number of times each conversion function is used
# iterate over dataframe
for index, row in tqdm(df.iterrows(), total=len(df)):
    discussion_metadata = row['discussion_metadata']
    # check if is valid JSON
    if not discussion_metadata or discussion_metadata == 'None' or not discussion_metadata.startswith('{'):
        # remove from dataframe
        df.drop(index, inplace=True)
        continue


    # parse as JSON
    discussion_metadata = json.loads(discussion_metadata)

    events = discussion_metadata['discussion']['events']
    
    # add a column to track status changes
    status_changes = []
    
    for event in events:
        # print("\t",event)
        event_type = event['type']
        if event_type == 'status-change':
            status_changes.append(event['data']['status'])
        all_events.add(event_type)
            
    # add a column to track status changes
    df.at[index, 'status_changes'] = ";".join(status_changes)
df
all_events

100%|██████████| 7823/7823 [00:00<00:00, 8086.26it/s]


{'comment', 'commit', 'status-change'}

In [19]:
import requests
# iterate over dataframe for empty status changes
for index, row in tqdm(df.iterrows(), total=len(df)):
    status_changes = row['status_changes']
    if not status_changes:
        # make an HTTP request to check if there is a merge conflict
        pr_url = row['pr_url']
        response = requests.get(pr_url)
        if response.status_code == 200:
            # check if merge conflict
            if 'conflict' in response.text:
                df.at[index, 'status_changes'] = 'CONFLICT'
                print(pr_url)
                break
            # else:
            #     df.at[index, 'status_changes'] = 'UNKNOWN'
        

  1%|          | 40/7823 [00:03<12:00, 10.80it/s]

https://huggingface.co/eimiss/EimisAnimeDiffusion_1.0v/discussions/15





In [None]:

import matplotlib.pyplot as plt

# POlot the distribution of status changes only for those with status changes
df.value_counts().plot(kind='bar')
plt.show()

