Skip to content

Commit

Permalink
Mean Shift
Browse files Browse the repository at this point in the history
  • Loading branch information
madhug-nadig committed Jun 1, 2017
1 parent 4086f51 commit 162f64c
Showing 1 changed file with 60 additions and 2 deletions.
62 changes: 60 additions & 2 deletions Mean Shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from matplotlib import style
import pandas
import datetime
from sklearn import preprocessing, cross_validation

#for plotting
plt.style.use('ggplot')
Expand All @@ -21,12 +22,69 @@ def __init__(self):


def main():
'''
Pclass Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)
survival Survival (0 = No; 1 = Yes)
name Name
sex Sex
age Age
sibsp Number of Siblings/Spouses Aboard
parch Number of Parents/Children Aboard
ticket Ticket Number
fare Passenger Fare (British pound)
cabin Cabin
embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)
boat Lifeboat
body Body Identification Number
home.dest Home/Destination
'''

dataset = { -1 : np.array([[2,3],[4,5],[2,1]]), 1: np.array([[5,6], [8,8], [9,9]]) }
df = pd.read_excel('data/titanic.xls')

original_df = pd.DataFrame.copy(df)
df.drop(['body','name'], 1, inplace=True)
df.fillna(0,inplace=True)

def handle_non_numerical_data(df):

# handling non-numerical data: must convert.
columns = df.columns.values

for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]

#print(column,df[column].dtype)
if df[column].dtype != np.int64 and df[column].dtype != np.float64:

column_contents = df[column].values.tolist()
#finding just the uniques
unique_elements = set(column_contents)
# great, found them.
x = 0
for unique in unique_elements:
if unique not in text_digit_vals:
# creating dict that contains new
# id per unique string
text_digit_vals[unique] = x
x+=1
# now we map the new "id" vlaue
# to replace the string.
df[column] = list(map(convert_to_int,df[column]))

return df

df = handle_non_numerical_data(df)
df.drop(['ticket','home.dest'], 1, inplace=True)

X = np.array(df.drop(['survived'], 1).astype(float))
X = preprocessing.scale(X)
y = np.array(df['survived'])
ms = CustomMS()

ms.fit(dataset = dataset)
pred = ms.predict(attrs = [2,2])


if __name__ == "__main__":
main()

0 comments on commit 162f64c

Please sign in to comment.