![image](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTKbtFaCY2RxamhiK68ufZQW-t5G2KyZ1khVA&usqp=CAU)

# How To Be A Successful Businessman 
1. Face reality, there is no work – life2 – balance!
2. Eagles fly alone.
3. It’s bloody hard, regardless of what happens to you in life.
4. There’s only one way to grow as an individual.
5.  Burn your ships.
6. You can accomplish anything with sacrifice.

# Call libraries

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Read Data 

In [1]:
df=pd.read_csv('../input/entrepreneurial-competency-in-university-students/data.csv')

In [1]:
df.head(10)

In [1]:
df.columns

In [1]:
df.info()

In [1]:
df.shape

In [1]:
df.describe()

In [1]:
df.isnull()

In [1]:
df.drop(['Influenced','City','MentalDisorder', 'KeyTraits','ReasonsForLack','Gender','EducationSector','IndividualProject'],
          axis=1,inplace=True)


In [1]:
df.tail(10)

# Using visualization 

In [1]:
x = df['Age']
plt.figure(figsize=(10,8))
ax = sns.countplot(x,color='#00ffff')
ax.set_xlabel(xlabel = 'Age of Students', fontsize = 16)
ax.set_title(label = 'Distribution of Age of Students', fontsize = 20)
plt.show()


In [1]:
x = df['DesireToTakeInitiative']
plt.figure(figsize=(10,8))
ax = sns.countplot(x,color='#00ffff')
ax.set_xlabel(xlabel = 'DesireToTakeInitiative', fontsize = 16)
ax.set_title(label = 'DesireToTakeInitiative', fontsize = 20)
plt.show()

In [1]:
sns.heatmap(df.corr(),annot=True);

In [1]:
sns.pairplot(data=df)
plt.show()


In [1]:
sns.set_style("ticks", {"xtick.major.size": 8, "ytick.major.size": 8})
df['Age'].hist(bins=30, color = "red", edgecolor='black', alpha=0.65, lw=1.5)
plt.xlabel('Perseverance')


In [1]:
sns.countplot(data=df, x='Age')

In [1]:
sns.countplot(data=df, x='SelfReliance')

In [1]:
sns.jointplot(x = 'Age', y = 'Competitiveness', data = df, kind = 'kde', color = "red", alpha = 0.65)

In [1]:
sns.pairplot(df, hue = 'Age', palette = 'cool_r')

In [1]:
# get correlation matrix
corr = df.corr()
fig, ax = plt.subplots()
# create heatmap
im = ax.imshow(corr.values)

# set labels
ax.set_xticks(np.arange(len(corr.columns)))
ax.set_yticks(np.arange(len(corr.columns)))
ax.set_xticklabels(corr.columns)
ax.set_yticklabels(corr.columns)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")


In [1]:
# As you can see there is no labels in data
plt.scatter(df['Age'],df['Perseverance'])
plt.xlabel('Age')
plt.ylabel('Perseverance')
plt.show()


In [1]:
df.hist(edgecolor='black', linewidth=1.2)
fig=plt.gcf()
fig.set_size_inches(12,6)
plt.show()


In [1]:
plt.figure(figsize=(15,10))
plt.subplot(2,2,1)
sns.violinplot(x='Age',y='Perseverance',data=df)
plt.subplot(2,2,2)
sns.violinplot(x='Age',y='DesireToTakeInitiative',data=df)
plt.subplot(2,2,3)
sns.violinplot(x='Age',y='Competitiveness',data=df)
plt.subplot(2,2,4)
sns.violinplot(x='Age',y='SelfReliance',data=df)


In [1]:
#draws  heatmap with input as the correlation matrix calculted by(iris.corr())
plt.figure(figsize=(10,15)) 
sns.heatmap(df.corr(),annot=True,cmap='cubehelix_r') 
plt.show()


In [1]:
from sklearn.model_selection import train_test_split

In [1]:
train, test = train_test_split(df, test_size = 0.3)# in this our main data is split into train and test
# the attribute test_size=0.3 splits the data into 70% and 30% ratio. train=70% and test=30%
print(train.shape)
print(test.shape)


In [1]:
train_X = train[['Age','Age','Perseverance','Perseverance']]# taking the training data features
train_y=train.y# output of our training data
test_X= test[['Age','Age','Perseverance','Perseverance']] # taking test data features
test_y =test.y   #output value of test data


In [1]:
test_y

In [1]:
train_X

In [1]:
train_y

In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
grid = {'n_neighbors': np.arange(1,50)}
knn = KNeighborsClassifier()
knn_cv = GridSearchCV(knn, grid, cv=3) # GridSearchCV
knn_cv.fit(train_X,train_y)# Fit

# Print hyperparameter


In [1]:
print("Tuned hyperparameter k: {}".format(knn_cv.best_params_)) 
print("Best score: {}".format(knn_cv.best_score_))


In [1]:
from sklearn.linear_model import LogisticRegression
# grid search cross validation with 2 hyperparameter
# 1. hyperparameter is C:logistic regression regularization parameter
# 2. penalty l1 or l2
# Hyperparameter grid
param_grid = {'C': np.logspace(-3, 3, 7), 'penalty': ['l1', 'l2']}
x_train, x_test, y_train, y_test = train_test_split(train_X,train_y,test_size = 0.3,random_state = 12)
logreg = LogisticRegression()
logreg_cv = GridSearchCV(logreg,param_grid,cv=3)
logreg_cv.fit(x_train,y_train)

# Print the optimal parameters and best score
print("Tuned hyperparameters : {}".format(logreg_cv.best_params_))
print("Best Accuracy: {}".format(logreg_cv.best_score_))



In [1]:
# SVM, pre-process and pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
steps = [('scalar', StandardScaler()),
         ('SVM', SVC())]
pipeline = Pipeline(steps)
parameters = {'SVM__C':[1, 10, 100],
              'SVM__gamma':[0.1, 0.01]}
x_train, x_test, y_train, y_test = train_test_split(train_X,train_y,test_size=0.2,random_state = 1)
cv = GridSearchCV(pipeline,param_grid=parameters,cv=3)
cv.fit(x_train,y_train)

y_pred = cv.predict(x_test)

print("Accuracy: {}".format(cv.score(x_test, y_test)))
print("Tuned Model Parameters: {}".format(cv.best_params_))


In [1]:
# As you can see there is no labels in data
plt.scatter(df['Age'],df['Perseverance'])
plt.xlabel('pelvic_radius')
plt.ylabel('degree_spondylolisthesis')
plt.show()


In [1]:
df.hist(edgecolor='black', linewidth=1.2)
fig=plt.gcf()
fig.set_size_inches(12,6)
plt.show()
