## Exploratory Analysis - Tests

### Global Imports

In [2]:
### Imports
import os
import json
import pandas as pd

In [3]:
# pip install convokit

### Parameters and Global Variables

In [4]:
# Get current working directory
cwd = os.getcwd()
LOCAL_PATH = cwd.replace("\\", "/")
LOCAL_PATH = LOCAL_PATH.replace("data/statistics", "data/convokit")
LOCAL_PATH

'c:/Users/fdmol/Desktop/MSCAPP/CAPP30254/supreme-court-ml-predictions/supreme_court_predictions/data/convokit'

### Functions

In [5]:
"""
This file serves as the client for convokit.
"""
from convokit import Corpus, download


def get_data():
    """
    Loads and outputs the Supreme Court Corpus data
    """

    print("Loading Supreme Court Corpus Data...")
    corpus = Corpus(filename=download("supreme-corpus"))
    corpus.dump(
        "supreme_corpus", base_path=LOCAL_PATH
    )


In [6]:
### Begin reading data
def load_data(file_name):
    """
    Opens the data and returns it as a dictionary

    :param file_name: The name of the file to open
    :return: The data as a dictionary
    """
    path = LOCAL_PATH + f"/supreme_corpus/{file_name}"
    with open(path) as file:
        data = json.load(file)
    return data

In [7]:
def speakers_to_df(speakers_dict):
    """
    Converts the speakers dictionary to a pandas dataframe

    :param speakers_dict: The speakers dictionary
    :return: The speakers dataframe
    """

    dict_list = []
    for speaker in list(speakers_dict.keys()):
        speaker_data = speakers_dict[speaker]['meta']
        dict_list.append(speaker_data)

    df = pd.DataFrame(dict_list)
    df.rename(columns={'name': 'speaker_name', 'type': 'speaker_type',
                    'role': 'speaker_role'},
                    inplace=True)
    return df

### Pipeline and Tests

In [8]:
get_data()

Loading Supreme Court Corpus Data...
Dataset already exists at C:\Users\fdmol\.convokit\downloads\supreme-corpus


In [9]:
speakers_dict = load_data("speakers.json")
speakers_df = speakers_to_df(speakers_dict)
speakers_df.head()

Unnamed: 0,speaker_name,speaker_type,speaker_role
0,Earl Warren,J,justice
1,Harry F. Murphy,A,
2,William O. Douglas,J,justice
3,<INAUDIBLE>,U,inaudible
4,Felix Frankfurter,J,justice


In [10]:
# Conversations metadata
conversations_dict = load_data("conversations.json")

In [21]:
dict_list = []
for conversation_id in list(conversations_dict.keys()):
    clean_dict = {}
    conversation_data = conversations_dict[conversation_id]['meta']
    clean_dict['id'] = conversation_id
    clean_dict['case_id'] = conversation_data['case_id']
    clean_dict['winning_side'] = conversation_data['win_side']

    # Now, advocates and votes side
    # clean_dict['advocates'] = conversation_data['advocates']
    # clean_dict['votes'] = conversation_data['votes_side']

    advocates = conversation_data['advocates']
    votes = conversation_data['votes_side']

    advocates_list = []
    for advocate in advocates:
        advocate_dict = {}
        advocate['id'] = conversation_id
        advocate['case_id'] = conversation_data['case_id']
        advocate_dict['advocate'] = advocate
        advocate_dict['side'] = advocates[advocate]['side']
        advocate_dict['role'] = advocates[advocate]['role']
        advocates_list.append(advocate_dict)

    
    dict_list.append(clean_dict)

# conversations_df = pd.DataFrame(dict_list)



{'harry_f_murphy': {'side': 1, 'role': 'inferred'}, 'john_v_lindsay': {'side': 0, 'role': 'inferred'}}
{'j__john_m_harlan2': 0, 'j__hugo_l_black': 0, 'j__william_o_douglas': 0, 'j__earl_warren': 0, 'j__tom_c_clark': 0, 'j__felix_frankfurter': 0, 'j__harold_burton': 0, 'j__stanley_reed': 0, 'j__sherman_minton': 0}
{'howard_c_westwood': {'side': 1, 'role': 'inferred'}, 'walter_j_derenberg': {'side': 0, 'role': 'inferred'}}
{'j__john_m_harlan2': 1, 'j__hugo_l_black': 1, 'j__william_o_douglas': 0, 'j__earl_warren': 1, 'j__tom_c_clark': 1, 'j__felix_frankfurter': 1, 'j__harold_burton': 1, 'j__stanley_reed': 0, 'j__sherman_minton': 1}
{'howard_c_westwood': {'side': 1, 'role': 'inferred'}, 'walter_j_derenberg': {'side': 0, 'role': 'inferred'}}
{'j__john_m_harlan2': 1, 'j__hugo_l_black': 1, 'j__william_o_douglas': 0, 'j__earl_warren': 1, 'j__tom_c_clark': 1, 'j__felix_frankfurter': 1, 'j__harold_burton': 1, 'j__stanley_reed': 0, 'j__sherman_minton': 1}
{'harry_d_graham': {'side': 3, 'role': 'i

In [29]:
for advocate in advocates:
    advocate_dict = {}
    advocate_dict['advocate'] = advocate
    advocate_dict['side'] = advocates[advocate]['side']
    advocate_dict['role'] = advocates[advocate]['role']

In [30]:
advocate_dict

{'advocate': 'mark_c_fleming', 'side': 0, 'role': 'for the Respondent'}