In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from scipy.sparse import hstack, csr_matrix
import numpy as np

class TextRelevanceModel:
    def __init__(self, keyword_categories, negative_keywords=None, model=None):
        self.keyword_categories = keyword_categories
        self.negative_keywords = negative_keywords if negative_keywords is not None else []
        self.vectorizer = TfidfVectorizer()
        self.model = model if model is not None else LogisticRegression(max_iter=1000, random_state=42)
    
    def keyword_count(self, text, keywords):
        return sum(1 for word in text.lower().split() if word in keywords)
    
    def prepare_data(self, df, text_column):
        df[text_column] = df[text_column].str.lower()

        keyword_counts = pd.DataFrame()
        
        for category, keywords in self.keyword_categories.items():
            keyword_counts[category + '_count'] = df[text_column].apply(lambda x: self.keyword_count(x, keywords))
        
        keyword_counts['negative_keyword_count'] = df[text_column].apply(lambda x: self.keyword_count(x, self.negative_keywords))
        
        X_text = self.vectorizer.fit_transform(df[text_column])
        X_keywords = keyword_counts.to_numpy()
        X = hstack([X_text, csr_matrix(X_keywords)])
        
        return X
    
    def train(self, X, y):
        keyword_present = np.any(X[:, -len(self.keyword_categories):].toarray(), axis=1)
        X_train = X[keyword_present]
        y_train = y[keyword_present]
        
        self.model.fit(X_train, y_train)
    
    def predict_proba(self, text):
        X_text_new = self.vectorizer.transform([text])
        keyword_counts_new = np.array([[self.keyword_count(text, keywords) for keywords in self.keyword_categories.values()]])
        negative_keyword_count_new = np.array([[self.keyword_count(text, self.negative_keywords)]])
        X_new = hstack([X_text_new, csr_matrix(keyword_counts_new), csr_matrix(negative_keyword_count_new)])
        
        if not np.any(keyword_counts_new):
            return 0.0
        
        return self.model.predict_proba(X_new)[0, 1]


Probability of relevance: 0.49353904502514173


In [19]:
# Example usage
if __name__ == "__main__":
    # Example DataFrame
    data = {
        'text': [
            'This is the first document.', 'This document is the second document.', 
            'And this is the third one.', 'Is this the first document?',
            'More text data.', 'Even more text.', 'Text data again.', 
            'And another one.', 'More examples.', 'Additional text.',
            'Sample text.', 'Another sample.', 'More samples.', 'Final example.'
        ],
        'relevance': [1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0]  # Relevance labels
    }
    df = pd.DataFrame(data)
    
    # Define categories of keywords
    keyword_categories = {
        'Category1': ['first', 'second', 'third'],
        'Category2': ['more', 'additional', 'sample']
    }
    
    # Define negative keywords
    negative_keywords = ['another', 'final']
    
    # Create an instance of the model
    model = TextRelevanceModel(keyword_categories, negative_keywords)
    
    # Prepare data
    X = model.prepare_data(df, text_column='text')
    y = df['relevance']
    
    # Train the model
    model.train(X, y)
    
    # Predict the probability of relevance for a new sample of text
    text_sample = "This is a new document with some more relevant keywords"
    relevance_probability = model.predict_proba(text_sample)
    print("Probability of relevance:", relevance_probability)


Probability of relevance: 0.49353904502514173


In [20]:
# Example DataFrame with 5 text samples
data = {
    'text': [
        'This is the first document.', 
        'This document is the second document.', 
        'And this is the third one.', 
        'Is this the first document?',
        'More text data.'
    ]
}
df_samples = pd.DataFrame(data)

# Predict the probability of relevance for each text sample
for i, text_sample in enumerate(df_samples['text']):
    relevance_probability = model.predict_proba(text_sample)
    print(f"Probability of relevance for sample {i+1}: {relevance_probability:.2f}")


Probability of relevance for sample 1: 0.36
Probability of relevance for sample 2: 0.36
Probability of relevance for sample 3: 0.34
Probability of relevance for sample 4: 0.36
Probability of relevance for sample 5: 0.63


In [21]:
# Example usage
if __name__ == "__main__":
    # Importing and dropping rows from Frame

    df_coded = pd.read_csv('../data/processed_and_coded_posts.csv')
    df = df_coded[['processed_text','highly_relevant']]
    
    #Importing keywords

    csv_file_path = '../keywords/medications.csv'

    # Read the CSV file
    df_med = pd.read_csv(csv_file_path)

    # Extract the first column as a list of keywords
    medications = df_med.iloc[:, 0].tolist()

    csv_file_path_2 = '../keywords/Treatment.csv'

    # Read the CSV file
    df_therapy = pd.read_csv(csv_file_path_2)

    # Extract the first column as a list of keywords
    therapy = df_therapy.iloc[:, 0].tolist()

    general_keywords = ['medicine','therapy','treatment','recovery','prescribed','diagnosed','med','meds','prescribe','therapist','session','psychiatrist','psychiatrists','dosage','medication', 'dbt', 'abilify', 'outpatient', 'therapist', 'harming','medicine','therapy','treatment','recovery','prescribed','diagnosed','therapists','prescribe','diagnose','medicines','drugs','drug','therapist','session']

    # Define categories of keywords
    
    keyword_categories = {
    'general_keywords': general_keywords,
    'medications': medications,
    'therapy': therapy
}
    
    # Define negative keywords
    negative_keywords = ['relationship', 'friend', 'together', 'fp', 'people', 'person', 'partner', 'dating']
    
    # Create an instance of the model
    model = TextRelevanceModel(keyword_categories, negative_keywords)
    
    #Prepare data
    X = model.prepare_data(df, text_column='processed_text')
    y = df['highly_relevant']

    # Train the model
    model.train(X, y)


    # Predict the probability of relevance for a new sample of text
    text_sample = "This is a new an excerpt on dbt and emdr"
    relevance_probability = model.predict_proba(text_sample)
    print("Probability of relevance:", relevance_probability)

   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[text_column] = df[text_column].str.lower()


Probability of relevance: 0.09328291951431598


In [11]:
df.columns

Index(['processed_text', 'highly_relevant'], dtype='object')

In [32]:
# Predict the probability of relevance for each text sample
df_coded['relevance_probability']=0.0

for i, text_sample in enumerate(df['processed_text']):
    relevance_probability = model.predict_proba(text_sample)
    df_coded['relevance_probability'][i]=relevance_probability
    print(f"Probability of relevance for sample {i+1}: {relevance_probability:.2f}")

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 1: 0.00
Probability of relevance for sample 2: 0.00
Probability of relevance for sample 3: 0.00
Probability of relevance for sample 4: 0.04
Probability of relevance for sample 5: 0.00
Probability of relevance for sample 6: 0.06
Probability of relevance for sample 7: 0.00
Probability of relevance for sample 8: 0.00
Probability of relevance for sample 9: 0.00
Probability of relevance for sample 10: 0.19
Probability of relevance for sample 11: 0.09
Probability of relevance for sample 12: 0.02
Probability of relevance for sample 13: 0.00
Probability of relevance for sample 14: 0.00
Probability of relevance for sample 15: 0.05
Probability of relevance for sample 16: 0.01
Probability of relevance for sample 17: 0.00
Probability of relevance for sample 18: 0.09
Probability of relevance for sample 19: 0.00
Probability of relevance for sample 20: 0.00
Probability of relevance for sample 21: 0.00
Probability of relevance for sample 22: 0.00
Probability of rele

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 28: 0.00
Probability of relevance for sample 29: 0.01
Probability of relevance for sample 30: 0.00
Probability of relevance for sample 31: 0.04
Probability of relevance for sample 32: 0.01
Probability of relevance for sample 33: 0.29
Probability of relevance for sample 34: 0.02
Probability of relevance for sample 35: 0.11
Probability of relevance for sample 36: 0.00
Probability of relevance for sample 37: 0.01
Probability of relevance for sample 38: 0.00
Probability of relevance for sample 39: 0.00
Probability of relevance for sample 40: 0.00
Probability of relevance for sample 41: 0.00
Probability of relevance for sample 42: 0.00
Probability of relevance for sample 43: 0.00
Probability of relevance for sample 44: 0.00
Probability of relevance for sample 45: 0.00
Probability of relevance for sample 46: 0.00
Probability of relevance for sample 47: 0.05
Probability of relevance for sample 48: 0.08
Probability of relevance for sample 49: 0.00
Probabilit

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 57: 0.00
Probability of relevance for sample 58: 0.00
Probability of relevance for sample 59: 0.00
Probability of relevance for sample 60: 0.00
Probability of relevance for sample 61: 0.00
Probability of relevance for sample 62: 0.00
Probability of relevance for sample 63: 0.00
Probability of relevance for sample 64: 0.00
Probability of relevance for sample 65: 0.00
Probability of relevance for sample 66: 0.00
Probability of relevance for sample 67: 1.00
Probability of relevance for sample 68: 0.00
Probability of relevance for sample 69: 0.00
Probability of relevance for sample 70: 0.01
Probability of relevance for sample 71: 0.00
Probability of relevance for sample 72: 0.00
Probability of relevance for sample 73: 0.00
Probability of relevance for sample 74: 0.00
Probability of relevance for sample 75: 0.00
Probability of relevance for sample 76: 0.03
Probability of relevance for sample 77: 0.00
Probability of relevance for sample 78: 0.00
Probabilit

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 87: 0.00
Probability of relevance for sample 88: 0.12
Probability of relevance for sample 89: 0.00
Probability of relevance for sample 90: 0.00
Probability of relevance for sample 91: 0.43
Probability of relevance for sample 92: 0.00
Probability of relevance for sample 93: 0.00
Probability of relevance for sample 94: 0.00
Probability of relevance for sample 95: 0.00
Probability of relevance for sample 96: 0.00
Probability of relevance for sample 97: 0.00
Probability of relevance for sample 98: 0.00
Probability of relevance for sample 99: 0.00
Probability of relevance for sample 100: 0.00
Probability of relevance for sample 101: 0.00
Probability of relevance for sample 102: 0.00
Probability of relevance for sample 103: 0.00
Probability of relevance for sample 104: 0.00
Probability of relevance for sample 105: 0.00
Probability of relevance for sample 106: 0.00


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 107: 0.23
Probability of relevance for sample 108: 0.00
Probability of relevance for sample 109: 0.00
Probability of relevance for sample 110: 0.00
Probability of relevance for sample 111: 0.02
Probability of relevance for sample 112: 0.00
Probability of relevance for sample 113: 0.31
Probability of relevance for sample 114: 0.00
Probability of relevance for sample 115: 0.44
Probability of relevance for sample 116: 0.00
Probability of relevance for sample 117: 0.00
Probability of relevance for sample 118: 0.00
Probability of relevance for sample 119: 0.02


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 120: 0.00
Probability of relevance for sample 121: 0.04
Probability of relevance for sample 122: 0.06
Probability of relevance for sample 123: 0.10
Probability of relevance for sample 124: 0.09
Probability of relevance for sample 125: 0.00
Probability of relevance for sample 126: 0.00
Probability of relevance for sample 127: 0.74
Probability of relevance for sample 128: 0.00
Probability of relevance for sample 129: 0.00
Probability of relevance for sample 130: 0.02
Probability of relevance for sample 131: 0.00
Probability of relevance for sample 132: 0.00
Probability of relevance for sample 133: 0.00
Probability of relevance for sample 134: 0.02


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 135: 0.04
Probability of relevance for sample 136: 0.16
Probability of relevance for sample 137: 0.04
Probability of relevance for sample 138: 0.00
Probability of relevance for sample 139: 0.00
Probability of relevance for sample 140: 0.00
Probability of relevance for sample 141: 0.00
Probability of relevance for sample 142: 0.94
Probability of relevance for sample 143: 0.00
Probability of relevance for sample 144: 0.00
Probability of relevance for sample 145: 0.00
Probability of relevance for sample 146: 0.93
Probability of relevance for sample 147: 0.08


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 148: 0.00
Probability of relevance for sample 149: 0.02
Probability of relevance for sample 150: 0.00
Probability of relevance for sample 151: 0.02
Probability of relevance for sample 152: 0.00
Probability of relevance for sample 153: 0.04
Probability of relevance for sample 154: 0.00
Probability of relevance for sample 155: 0.30
Probability of relevance for sample 156: 0.00
Probability of relevance for sample 157: 0.01
Probability of relevance for sample 158: 0.00
Probability of relevance for sample 159: 0.00


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 160: 0.12
Probability of relevance for sample 161: 0.05
Probability of relevance for sample 162: 0.00
Probability of relevance for sample 163: 0.00
Probability of relevance for sample 164: 0.00
Probability of relevance for sample 165: 0.06
Probability of relevance for sample 166: 0.02
Probability of relevance for sample 167: 0.00
Probability of relevance for sample 168: 0.00
Probability of relevance for sample 169: 0.00
Probability of relevance for sample 170: 0.02
Probability of relevance for sample 171: 0.02
Probability of relevance for sample 172: 0.00
Probability of relevance for sample 173: 0.42
Probability of relevance for sample 174: 0.03
Probability of relevance for sample 175: 0.00
Probability of relevance for sample 176: 0.00
Probability of relevance for sample 177: 0.05
Probability of relevance for sample 178: 0.00
Probability of relevance for sample 179: 0.00
Probability of relevance for sample 180: 0.00
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 186: 0.00
Probability of relevance for sample 187: 0.06
Probability of relevance for sample 188: 0.00
Probability of relevance for sample 189: 0.00
Probability of relevance for sample 190: 0.02
Probability of relevance for sample 191: 0.00
Probability of relevance for sample 192: 0.05
Probability of relevance for sample 193: 0.02
Probability of relevance for sample 194: 0.02
Probability of relevance for sample 195: 0.00
Probability of relevance for sample 196: 0.00
Probability of relevance for sample 197: 0.39
Probability of relevance for sample 198: 0.16
Probability of relevance for sample 199: 0.00
Probability of relevance for sample 200: 0.00
Probability of relevance for sample 201: 0.00
Probability of relevance for sample 202: 0.01
Probability of relevance for sample 203: 0.16


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 204: 0.00
Probability of relevance for sample 205: 0.01
Probability of relevance for sample 206: 0.95
Probability of relevance for sample 207: 0.00
Probability of relevance for sample 208: 0.00
Probability of relevance for sample 209: 0.00
Probability of relevance for sample 210: 0.00
Probability of relevance for sample 211: 0.02
Probability of relevance for sample 212: 0.00
Probability of relevance for sample 213: 0.00
Probability of relevance for sample 214: 0.02
Probability of relevance for sample 215: 0.02
Probability of relevance for sample 216: 0.00
Probability of relevance for sample 217: 0.00
Probability of relevance for sample 218: 0.02
Probability of relevance for sample 219: 0.00
Probability of relevance for sample 220: 0.00
Probability of relevance for sample 221: 0.00
Probability of relevance for sample 222: 0.00
Probability of relevance for sample 223: 0.48


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 224: 0.00
Probability of relevance for sample 225: 0.15
Probability of relevance for sample 226: 0.00
Probability of relevance for sample 227: 0.00
Probability of relevance for sample 228: 0.00
Probability of relevance for sample 229: 0.00
Probability of relevance for sample 230: 0.00
Probability of relevance for sample 231: 0.00
Probability of relevance for sample 232: 0.00
Probability of relevance for sample 233: 0.00
Probability of relevance for sample 234: 0.06
Probability of relevance for sample 235: 0.00
Probability of relevance for sample 236: 0.19
Probability of relevance for sample 237: 0.00
Probability of relevance for sample 238: 0.00
Probability of relevance for sample 239: 0.01
Probability of relevance for sample 240: 0.04
Probability of relevance for sample 241: 0.00
Probability of relevance for sample 242: 0.00
Probability of relevance for sample 243: 0.00
Probability of relevance for sample 244: 0.00
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 248: 0.00
Probability of relevance for sample 249: 0.00
Probability of relevance for sample 250: 0.03
Probability of relevance for sample 251: 0.00
Probability of relevance for sample 252: 0.00
Probability of relevance for sample 253: 0.00
Probability of relevance for sample 254: 0.00
Probability of relevance for sample 255: 0.02
Probability of relevance for sample 256: 0.00
Probability of relevance for sample 257: 0.00
Probability of relevance for sample 258: 0.00
Probability of relevance for sample 259: 0.00
Probability of relevance for sample 260: 0.00
Probability of relevance for sample 261: 0.00
Probability of relevance for sample 262: 0.00
Probability of relevance for sample 263: 0.00
Probability of relevance for sample 264: 0.00
Probability of relevance for sample 265: 0.02
Probability of relevance for sample 266: 0.00
Probability of relevance for sample 267: 0.00
Probability of relevance for sample 268: 0.00
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 274: 0.00
Probability of relevance for sample 275: 0.00
Probability of relevance for sample 276: 0.00
Probability of relevance for sample 277: 0.01
Probability of relevance for sample 278: 0.01
Probability of relevance for sample 279: 0.00
Probability of relevance for sample 280: 0.00
Probability of relevance for sample 281: 0.03
Probability of relevance for sample 282: 0.00
Probability of relevance for sample 283: 0.00
Probability of relevance for sample 284: 0.04
Probability of relevance for sample 285: 0.00
Probability of relevance for sample 286: 0.00
Probability of relevance for sample 287: 0.43
Probability of relevance for sample 288: 0.02
Probability of relevance for sample 289: 0.00
Probability of relevance for sample 290: 0.00
Probability of relevance for sample 291: 0.02
Probability of relevance for sample 292: 0.04
Probability of relevance for sample 293: 0.00
Probability of relevance for sample 294: 0.00


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 295: 0.05
Probability of relevance for sample 296: 0.00
Probability of relevance for sample 297: 0.00
Probability of relevance for sample 298: 0.03
Probability of relevance for sample 299: 0.00
Probability of relevance for sample 300: 0.00
Probability of relevance for sample 301: 0.00
Probability of relevance for sample 302: 0.00
Probability of relevance for sample 303: 0.00
Probability of relevance for sample 304: 0.00
Probability of relevance for sample 305: 0.00
Probability of relevance for sample 306: 0.04
Probability of relevance for sample 307: 0.00
Probability of relevance for sample 308: 0.00
Probability of relevance for sample 309: 0.00
Probability of relevance for sample 310: 0.00
Probability of relevance for sample 311: 0.00
Probability of relevance for sample 312: 0.00
Probability of relevance for sample 313: 0.04
Probability of relevance for sample 314: 0.05
Probability of relevance for sample 315: 0.00
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 320: 0.00
Probability of relevance for sample 321: 0.00
Probability of relevance for sample 322: 0.00
Probability of relevance for sample 323: 0.00
Probability of relevance for sample 324: 0.04
Probability of relevance for sample 325: 0.00
Probability of relevance for sample 326: 0.00
Probability of relevance for sample 327: 0.00
Probability of relevance for sample 328: 0.00
Probability of relevance for sample 329: 0.00
Probability of relevance for sample 330: 0.00
Probability of relevance for sample 331: 0.00
Probability of relevance for sample 332: 0.00
Probability of relevance for sample 333: 0.02
Probability of relevance for sample 334: 0.00
Probability of relevance for sample 335: 0.02
Probability of relevance for sample 336: 0.00
Probability of relevance for sample 337: 0.00
Probability of relevance for sample 338: 0.00
Probability of relevance for sample 339: 0.01


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 340: 0.00
Probability of relevance for sample 341: 0.00
Probability of relevance for sample 342: 0.00
Probability of relevance for sample 343: 0.00
Probability of relevance for sample 344: 0.00
Probability of relevance for sample 345: 0.00
Probability of relevance for sample 346: 0.00
Probability of relevance for sample 347: 0.00
Probability of relevance for sample 348: 0.42
Probability of relevance for sample 349: 0.06
Probability of relevance for sample 350: 0.00
Probability of relevance for sample 351: 0.21
Probability of relevance for sample 352: 0.00
Probability of relevance for sample 353: 0.20
Probability of relevance for sample 354: 0.00


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 355: 0.00
Probability of relevance for sample 356: 0.00
Probability of relevance for sample 357: 0.00
Probability of relevance for sample 358: 0.00
Probability of relevance for sample 359: 0.00
Probability of relevance for sample 360: 0.00
Probability of relevance for sample 361: 0.00
Probability of relevance for sample 362: 0.06
Probability of relevance for sample 363: 0.00
Probability of relevance for sample 364: 0.00
Probability of relevance for sample 365: 0.00
Probability of relevance for sample 366: 0.00
Probability of relevance for sample 367: 0.02
Probability of relevance for sample 368: 0.53
Probability of relevance for sample 369: 0.00
Probability of relevance for sample 370: 0.00
Probability of relevance for sample 371: 0.00
Probability of relevance for sample 372: 0.00
Probability of relevance for sample 373: 0.00
Probability of relevance for sample 374: 0.00


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 375: 0.00
Probability of relevance for sample 376: 0.07
Probability of relevance for sample 377: 0.01
Probability of relevance for sample 378: 0.00
Probability of relevance for sample 379: 0.00
Probability of relevance for sample 380: 0.00
Probability of relevance for sample 381: 0.00
Probability of relevance for sample 382: 0.01
Probability of relevance for sample 383: 0.00
Probability of relevance for sample 384: 0.01
Probability of relevance for sample 385: 0.00
Probability of relevance for sample 386: 0.00
Probability of relevance for sample 387: 0.00
Probability of relevance for sample 388: 0.00
Probability of relevance for sample 389: 0.00


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 390: 0.01
Probability of relevance for sample 391: 0.03
Probability of relevance for sample 392: 0.02
Probability of relevance for sample 393: 0.00
Probability of relevance for sample 394: 0.12
Probability of relevance for sample 395: 0.00
Probability of relevance for sample 396: 0.00
Probability of relevance for sample 397: 0.00
Probability of relevance for sample 398: 0.00
Probability of relevance for sample 399: 0.00
Probability of relevance for sample 400: 0.00
Probability of relevance for sample 401: 0.00
Probability of relevance for sample 402: 0.01
Probability of relevance for sample 403: 0.01
Probability of relevance for sample 404: 0.01
Probability of relevance for sample 405: 0.00
Probability of relevance for sample 406: 0.00
Probability of relevance for sample 407: 0.00
Probability of relevance for sample 408: 0.00
Probability of relevance for sample 409: 0.20
Probability of relevance for sample 410: 0.00
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 413: 0.00
Probability of relevance for sample 414: 0.00
Probability of relevance for sample 415: 0.56
Probability of relevance for sample 416: 0.02
Probability of relevance for sample 417: 0.00
Probability of relevance for sample 418: 0.00
Probability of relevance for sample 419: 0.00
Probability of relevance for sample 420: 0.00
Probability of relevance for sample 421: 0.00
Probability of relevance for sample 422: 0.00
Probability of relevance for sample 423: 0.00
Probability of relevance for sample 424: 0.00
Probability of relevance for sample 425: 0.01
Probability of relevance for sample 426: 0.00
Probability of relevance for sample 427: 0.00


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 428: 0.00
Probability of relevance for sample 429: 0.00
Probability of relevance for sample 430: 0.00
Probability of relevance for sample 431: 0.00
Probability of relevance for sample 432: 0.00
Probability of relevance for sample 433: 0.00
Probability of relevance for sample 434: 0.01
Probability of relevance for sample 435: 0.00
Probability of relevance for sample 436: 0.00
Probability of relevance for sample 437: 0.00
Probability of relevance for sample 438: 0.00
Probability of relevance for sample 439: 0.00
Probability of relevance for sample 440: 0.00
Probability of relevance for sample 441: 0.00
Probability of relevance for sample 442: 0.43
Probability of relevance for sample 443: 0.00
Probability of relevance for sample 444: 0.29
Probability of relevance for sample 445: 0.00
Probability of relevance for sample 446: 0.00
Probability of relevance for sample 447: 0.00
Probability of relevance for sample 448: 0.00
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 451: 0.00
Probability of relevance for sample 452: 0.00
Probability of relevance for sample 453: 0.04
Probability of relevance for sample 454: 0.00
Probability of relevance for sample 455: 0.02
Probability of relevance for sample 456: 0.00
Probability of relevance for sample 457: 0.00
Probability of relevance for sample 458: 0.04
Probability of relevance for sample 459: 0.00
Probability of relevance for sample 460: 0.00
Probability of relevance for sample 461: 0.00
Probability of relevance for sample 462: 0.00
Probability of relevance for sample 463: 0.00
Probability of relevance for sample 464: 0.01
Probability of relevance for sample 465: 0.00
Probability of relevance for sample 466: 0.00
Probability of relevance for sample 467: 0.00
Probability of relevance for sample 468: 0.24
Probability of relevance for sample 469: 0.07
Probability of relevance for sample 470: 0.00
Probability of relevance for sample 471: 0.00
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 482: 0.06
Probability of relevance for sample 483: 0.05
Probability of relevance for sample 484: 0.00
Probability of relevance for sample 485: 0.09
Probability of relevance for sample 486: 0.00
Probability of relevance for sample 487: 0.00
Probability of relevance for sample 488: 0.00
Probability of relevance for sample 489: 0.00
Probability of relevance for sample 490: 0.44
Probability of relevance for sample 491: 0.04
Probability of relevance for sample 492: 0.00
Probability of relevance for sample 493: 0.00
Probability of relevance for sample 494: 0.00
Probability of relevance for sample 495: 0.04
Probability of relevance for sample 496: 0.00
Probability of relevance for sample 497: 0.00
Probability of relevance for sample 498: 0.03
Probability of relevance for sample 499: 0.00
Probability of relevance for sample 500: 0.02
Probability of relevance for sample 501: 0.11


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 502: 0.01
Probability of relevance for sample 503: 0.00
Probability of relevance for sample 504: 0.00
Probability of relevance for sample 505: 0.00
Probability of relevance for sample 506: 0.00
Probability of relevance for sample 507: 0.00
Probability of relevance for sample 508: 0.00
Probability of relevance for sample 509: 0.00
Probability of relevance for sample 510: 0.00
Probability of relevance for sample 511: 0.00
Probability of relevance for sample 512: 0.00
Probability of relevance for sample 513: 0.00
Probability of relevance for sample 514: 0.00
Probability of relevance for sample 515: 0.01
Probability of relevance for sample 516: 0.00
Probability of relevance for sample 517: 0.00


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 518: 0.01
Probability of relevance for sample 519: 0.00
Probability of relevance for sample 520: 0.00
Probability of relevance for sample 521: 0.10
Probability of relevance for sample 522: 0.05
Probability of relevance for sample 523: 0.00
Probability of relevance for sample 524: 0.00
Probability of relevance for sample 525: 0.31
Probability of relevance for sample 526: 0.00
Probability of relevance for sample 527: 0.00
Probability of relevance for sample 528: 0.00
Probability of relevance for sample 529: 0.00
Probability of relevance for sample 530: 0.00
Probability of relevance for sample 531: 0.00
Probability of relevance for sample 532: 0.03
Probability of relevance for sample 533: 0.00
Probability of relevance for sample 534: 0.00
Probability of relevance for sample 535: 0.00
Probability of relevance for sample 536: 0.00
Probability of relevance for sample 537: 0.00
Probability of relevance for sample 538: 0.03
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 540: 0.05
Probability of relevance for sample 541: 0.00
Probability of relevance for sample 542: 0.00
Probability of relevance for sample 543: 0.00
Probability of relevance for sample 544: 0.00
Probability of relevance for sample 545: 0.00
Probability of relevance for sample 546: 0.00
Probability of relevance for sample 547: 0.00
Probability of relevance for sample 548: 0.44
Probability of relevance for sample 549: 0.00
Probability of relevance for sample 550: 0.00
Probability of relevance for sample 551: 0.00
Probability of relevance for sample 552: 0.00
Probability of relevance for sample 553: 0.02
Probability of relevance for sample 554: 0.00
Probability of relevance for sample 555: 0.00
Probability of relevance for sample 556: 0.00
Probability of relevance for sample 557: 0.00
Probability of relevance for sample 558: 0.00
Probability of relevance for sample 559: 0.01
Probability of relevance for sample 560: 0.00
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

Probability of relevance for sample 572: 0.00
Probability of relevance for sample 573: 0.05
Probability of relevance for sample 574: 0.98
Probability of relevance for sample 575: 0.00
Probability of relevance for sample 576: 0.00
Probability of relevance for sample 577: 0.16
Probability of relevance for sample 578: 0.00
Probability of relevance for sample 579: 0.00
Probability of relevance for sample 580: 0.00
Probability of relevance for sample 581: 0.00
Probability of relevance for sample 582: 0.00
Probability of relevance for sample 583: 0.12
Probability of relevance for sample 584: 0.00
Probability of relevance for sample 585: 0.14
Probability of relevance for sample 586: 0.00
Probability of relevance for sample 587: 0.00
Probability of relevance for sample 588: 0.00
Probability of relevance for sample 589: 0.00
Probability of relevance for sample 590: 0.00
Probability of relevance for sample 591: 0.00
Probability of relevance for sample 592: 0.31
Probability of relevance for sampl

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_coded['relevance_probability'][i]=relevance_probability
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c

In [33]:
df_coded.sample(50)

Unnamed: 0,created_utc,subreddit,date_created,self,is_relevant,highly_relevant,processed_text,title,relevance_probability
291,1642636836,BPD,2022-01-20 00:00:36,1,0,0,want everything okay im always overanalyzing e...,I just want everything to be okay,0.04209978
427,1642828433,BPD,2022-01-22 05:13:53,1,0,0,ever life happiness stability biggest fear sta...,how can i ever have a life of happiness and st...,0.0
378,1642521632,BPD,2022-01-18 16:00:32,1,0,0,update post need perspective dont know explain...,"UPDATE ON POST ""NEED PERSPECTIVE""",0.0
467,1642780017,BPD,2022-01-21 15:46:57,1,1,0,i’ve rough couple day something someone said m...,i’ve been having a rough couple of days but so...,0.2367687
547,1641500935,BPD,2022-01-06 20:28:55,1,1,0,got diagnosed bpd today sure year super valida...,i got diagnosed with BPD today,0.4409053
125,1643654434,BPD,2022-01-31 18:40:34,1,1,0,20f shame overreacting need advice recently di...,[20F] shame for overreacting. i need advices (...,0.001892253
228,1642337843,BPD,2022-01-16 12:57:23,1,0,0,make fp connection healthier fp best friend go...,How can I make my FP connection healthier?,0.0
256,1642293200,BPD,2022-01-16 00:33:20,1,0,0,anger feel good dont know sometimes fantasize ...,Anger feels so good to me. I don't know why,0.0
485,1641579351,BPD,2022-01-07 18:15:51,1,0,0,partner lied bpd become codependent partner li...,Partner lied to me,0.0
594,1643580635,BPD,2022-01-30 22:10:35,1,0,0,vent nobody want talk feel isolated scared goi...,Vent.,0.0
