In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [17]:
# Data has no none entry so it is not necessary to drop any rows
# Transform categorical string columns to numerical values and keep a dictionary to map them back
def transform_strings_to_numerical(data):
    """
    This function transforms all string values in the dataframe to numerical values using the LabelEncoder from sklearn.
    Args:
        data : Our dataframe which we want to modify

    Returns:
        data : Our modified dataframe
        transform_data : A dictionary containing the mapping from the original string values to the numerical values
    """

    transform_data = {}
    for column in data.columns:
        # If data type is an object, for example a string, we want to convert the column to numerical values
        if data[column].dtype == 'object':
            le = LabelEncoder()
            data[column] = le.fit_transform(data[column])
            # Save the mapping in a dictionary
            transform_data[column] = dict(zip(le.classes_, le.transform(le.classes_)))
    return data, transform_data

In [18]:
def transform_numerical_to_string(data,transform_data):
    """
    This function transforms all numerical values in the dataframe back to the original string values using the LabelEncoder from sklearn.
    Args:
        data : Our dataframe which we want to modify
        transform_data : A dictionary containing the mapping from the original string values to the numerical values

    Returns:
        data : Our modified dataframe
    """
    for column, mapping in transform_data.items():
        #print(f"Mapping for column {column}: {mapping}")
        #if one of the mapped columns is not in the data anymore, we skip it
        if column not in data.columns:
            continue
        # need to reverse the mapping to map back to the original string values
        # this simply swaps the keys and values in the dictionary
        reverse_mapping = {v: k for k, v in mapping.items()}
        # now we can convert back to the original string values
        data[column] = data[column].map(reverse_mapping)
    return data

In [19]:
def get_data():
    """
    Returns the data from the csv file and transforms the categorical values to numerical values
    """
    # read in the data from the csv file
    data = pd.read_csv('data/kickstarter_projects.csv')
    # transform the categorical values to numerical values
    data, transform_data = transform_strings_to_numerical(data)
    """
    If we want to universally modify the data in any other way, we can do it here
    """
    #return the data and the transformation_data in case we want to transform the data back
    return data, transform_data

In [20]:
def get_original_data():
    """
    Returns the original data without any modifications
    """
    # read in the data from the csv file
    data = pd.read_csv('data/kickstarter_projects.csv')
    """
    If we want to universally modify the data in any other way, we can do it here
    """
    #return the data
    return data

In [21]:
#tests:
data, transform_data = get_data()
data.head()


Unnamed: 0,ID,Name,Category,Subcategory,Country,Launched,Deadline,Goal,Pledged,Backers,State
0,1860890148,130454,5,52,21,0,6,1000,625,30,1
1,709707365,63196,6,129,21,1,34,80000,22,3,1
2,1703704063,365635,0,70,21,2,0,20,35,3,3
3,727286,217100,13,131,21,3,31,99,145,25,3
4,1622952265,225555,5,52,21,4,4,1900,387,10,1


In [22]:
data = transform_numerical_to_string(data.head(),transform_data)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column] = data[column].map(reverse_mapping)


Unnamed: 0,ID,Name,Category,Subcategory,Country,Launched,Deadline,Goal,Pledged,Backers,State
0,1860890148,Grace Jones Does Not Give A F$#% T-Shirt (limi...,Fashion,Fashion,United States,2009-04-21 21:02:48,2009-05-31,1000,625,30,Failed
1,709707365,CRYSTAL ANTLERS UNTITLED MOVIE,Film & Video,Shorts,United States,2009-04-23 00:07:53,2009-07-20,80000,22,3,Failed
2,1703704063,drawing for dollars,Art,Illustration,United States,2009-04-24 21:52:03,2009-05-03,20,35,3,Successful
3,727286,Offline Wikipedia iPhone app,Technology,Software,United States,2009-04-25 17:36:21,2009-07-14,99,145,25,Successful
4,1622952265,Pantshirts,Fashion,Fashion,United States,2009-04-27 14:10:39,2009-05-26,1900,387,10,Failed
