Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions friendster_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,10 @@ def tenure_bucket(tenure):


def predict_paid_or_unpaid(years_experience):
if years_experience < 3.0:
if years_experience < 3.0 or years_experience >= 8.5:
return "paid"
elif years_experience < 8.5:
return "unpaid"
else:
return "paid"
Comment on lines -193 to -198
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function predict_paid_or_unpaid refactored with the following changes:

return "unpaid"


#######################
Expand Down
4 changes: 2 additions & 2 deletions helpers/gradient_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def maximize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01):

print("using the gradient")

v = [random.randint(-10, 10) for i in range(3)]
v = [random.randint(-10, 10) for _ in range(3)]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 169-186 refactored with the following changes:


tolerance = 0.0000001

Expand All @@ -183,7 +183,7 @@ def maximize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01):
print()
print("using minimize_batch")

v = [random.randint(-10, 10) for i in range(3)]
v = [random.randint(-10, 10) for _ in range(3)]

v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v)

Expand Down
4 changes: 2 additions & 2 deletions helpers/probabilty.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ def make_hist(p, n, num_points):
older = random_kid()
if older == "girl":
older_girl += 1
if older == "girl" and younger == "girl":
both_girls += 1
if younger == "girl":
both_girls += 1
Comment on lines -124 to +125
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 124-125 refactored with the following changes:

if older == "girl" or younger == "girl":
either_girl += 1

Expand Down
9 changes: 4 additions & 5 deletions helpers/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,10 @@ def median(v):
if n % 2 == 1:
# if odd, return the middle value
return sorted_v[midpoint]
else:
# if even, return the average of the middle values
lo = midpoint - 1
hi = midpoint
return (sorted_v[lo] + sorted_v[hi]) / 2
# if even, return the average of the middle values
lo = midpoint - 1
hi = midpoint
return (sorted_v[lo] + sorted_v[hi]) / 2
Comment on lines -53 to +56
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function median refactored with the following changes:



def quantile(x, p):
Expand Down
3 changes: 1 addition & 2 deletions hparams_grid_search_keras_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@


def load_data(filepath):
data = pd.read_csv(filepath)
return data
return pd.read_csv(filepath)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function load_data refactored with the following changes:



def describe_data(data, name):
Expand Down
26 changes: 13 additions & 13 deletions k_means_clustering/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ def train(self, inputs):

# and compute the new means based on the new assignments
for i in range(self.k):
i_points = [p for p, a in zip(inputs, assignments) if a == i]

if i_points:
if i_points := [p for p, a in zip(inputs, assignments) if a == i]:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function KMeans.train refactored with the following changes:

self.means[i] = vector_mean(i_points)


Expand Down Expand Up @@ -104,26 +102,28 @@ def cluster_distance(cluster1, cluster2, distance_agg=min):


def get_merge_order(cluster):
if is_leaf(cluster):
return float('inf')
else:
return cluster[0]
return float('inf') if is_leaf(cluster) else cluster[0]
Comment on lines -107 to +105
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_merge_order refactored with the following changes:



def bottom_up_cluster(inputs, distance_agg=min):
# start with every input leaf cluster
clusters = [input for input in inputs]
clusters = list(inputs)

# as long as we have more than one cluster left...
while len(clusters) > 1:
# find the two closest clusters
c1, c2 = min([(cluster1, cluster2)
for i, cluster1 in enumerate(clusters)
for cluster2 in clusters[:i]],
key=lambda p: cluster_distance(p[0], p[1], distance_agg))
c1, c2 = min(
(
(cluster1, cluster2)
for i, cluster1 in enumerate(clusters)
for cluster2 in clusters[:i]
),
key=lambda p: cluster_distance(p[0], p[1], distance_agg),
)


# remove them from the list of clusters
clusters = [c for c in clusters if c != c1 and c != c2]
clusters = [c for c in clusters if c not in [c1, c2]]
Comment on lines -115 to +126
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function bottom_up_cluster refactored with the following changes:


# merge them, using merge _order = # of cluster left
merged_cluster = (len(clusters), [c1, c2])
Expand Down
7 changes: 2 additions & 5 deletions k_nearest_neighbors/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@ def majority_vote(labels):
for count in vote_counts.values()
if count == winner_count])

if num_winners == 1:
return winner # unique winner, so return it
else:
return majority_vote(labels[:-1]) # try again without the farthest
return winner if num_winners == 1 else majority_vote(labels[:-1])
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function majority_vote refactored with the following changes:

This removes the following comments ( why? ):

# unique winner, so return it
# try again without the farthest



def knn_classify(k, labeled_points, new_point):
Expand Down Expand Up @@ -91,7 +88,7 @@ def classify_and_plot_grid(k=1):

plt.legend(loc=0) # let matplotlib choose the location
plt.axis([-130,-60,20,55]) # set the axes
plt.title(str(k) + "-Nearest Neighbor Programming Languages")
plt.title(f'{str(k)}-Nearest Neighbor Programming Languages')
Comment on lines -94 to +91
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function classify_and_plot_grid refactored with the following changes:

plt.show()

#
Expand Down
4 changes: 2 additions & 2 deletions multiple_regression/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def total_sum_of_squares(y):


def estimate_beta(x, y):
beta_initial = [random.random() for x_i in x[0]]
beta_initial = [random.random() for _ in x[0]]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function estimate_beta refactored with the following changes:

return minimize_stochastic(squared_error,
squared_error_gradient,
x, y,
Expand Down Expand Up @@ -99,7 +99,7 @@ def squared_error_ridge_gradient(x_i, y_i, beta, alpha):
def estimate_beta_ridge(x, y, alpha):
"""use gradient descent to fit a ridge regression
with penalty alpha"""
beta_initial = [random.random() for x_i in x[0]]
beta_initial = [random.random() for _ in x[0]]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function estimate_beta_ridge refactored with the following changes:

return minimize_stochastic(partial(squared_error_ridge, alpha=alpha),
partial(squared_error_ridge_gradient,
alpha=alpha),
Expand Down
17 changes: 7 additions & 10 deletions natural_language_processing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,7 @@ def random_y_given_x(x):


def random_x_given_y(y):
if y <= 7:
return random.randrange(1, y)
else:
return random.randrange(y - 6, 7)
return random.randrange(1, y) if y <= 7 else random.randrange(y - 6, 7)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function random_x_given_y refactored with the following changes:



def gibbs_sampling(num_iters=100):
Expand Down Expand Up @@ -171,9 +168,7 @@ def sample_from(weights):
topic_counts = [0 for _ in range(K)]
document_lengths = [len(d) for d in documents]

distinct_words = set(word
for document in documents
for word in document)
distinct_words = {word for document in documents for word in document}
Comment on lines -174 to +171
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 174-176 refactored with the following changes:


W = len(distinct_words)
D = len(documents)
Expand Down Expand Up @@ -203,16 +198,18 @@ def choose_new_topic(d, word):


random.seed(0)
document_topics = [[random.randrange(K) for word in document]
for document in documents]
document_topics = [
[random.randrange(K) for _ in document] for document in documents
]

Comment on lines -206 to +204
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 206-215 refactored with the following changes:


for d in range(D):
for word, topic in zip(documents[d], document_topics[d]):
document_topic_counts[d][topic] += 1
topic_word_counts[topic][word] += 1
topic_counts[topic] += 1

for iter in range(1000):
for _ in range(1000):
for d in range(D):
for i, (word, topic) in enumerate(zip(documents[d], document_topics[d])):
# remove this word/topic from the counts
Expand Down
66 changes: 28 additions & 38 deletions sonar_clf_rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,10 @@

def load_csv(filename):
"""This method loads a csv file"""
dataset = list()
dataset = []
with open(filename, 'r') as file:
csv_reader = reader(file)
for row in csv_reader:
if not row:
continue
dataset.append(row)

dataset.extend(row for row in csv_reader if row)
Comment on lines -8 to +11
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function load_csv refactored with the following changes:

return dataset


Expand All @@ -26,10 +22,7 @@ def str_columm_to_int(dataset, column):
"""This method converts a string column to int"""
class_values = [row[column] for row in dataset]
unique = set(class_values)
lookup = dict()

for i, value in enumerate(unique):
lookup[value] = i
lookup = {value: i for i, value in enumerate(unique)}
Comment on lines -29 to +25
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function str_columm_to_int refactored with the following changes:


for row in dataset:
row[column] = lookup[row[column]]
Expand All @@ -39,12 +32,12 @@ def str_columm_to_int(dataset, column):

def cross_validation_split(dataset, k_folds):
"""This method splits a dataset into k folds"""
dataset_split = list()
dataset_split = []
dataset_copy = list(dataset)
fold_size = int(len(dataset) / k_folds)

for i in range(k_folds):
fold = list()
for _ in range(k_folds):
fold = []
Comment on lines -42 to +40
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function cross_validation_split refactored with the following changes:

while(len(fold) < fold_size):
index = randrange(len(dataset_copy))
fold.append(dataset_copy.pop(index))
Expand All @@ -55,25 +48,21 @@ def cross_validation_split(dataset, k_folds):

def accuracy_score(actual, predicted):
"""This method predicts the accuracy percentage"""
correct = 0
for i in range(len(actual)):
if actual[i] == predicted[i]:
correct += 1

correct = sum(actual[i] == predicted[i] for i in range(len(actual)))
Comment on lines -58 to +51
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function accuracy_score refactored with the following changes:

return correct / float(len(actual)) * 100.0


def evaluate_algorithm(dataset, algorithm, k_folds, *args):
"""This method evaluates the algorithm using a cross validation split"""
folds = cross_validation_split(dataset, k_folds)
scores = list()
scores = []

for fold in folds:
train_set = list(folds)
train_set.remove(fold)
train_set = sum(train_set, [])

test_set = list()
test_set = []
Comment on lines -69 to +65
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function evaluate_algorithm refactored with the following changes:


for row in fold:
row_copy = list(row)
Expand Down Expand Up @@ -105,7 +94,7 @@ def test_split(index, value, dataset):
def gini_index(groups, classes):
"""This method calculates the gini index for a split dataset"""
# count all samples at split point
n_instances = float(sum([len(group) for group in groups]))
n_instances = float(sum(len(group) for group in groups))
Comment on lines -108 to +97
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function gini_index refactored with the following changes:

# sum weighted gini index for each group
gini = 0.0
for group in groups:
Expand All @@ -126,9 +115,9 @@ def gini_index(groups, classes):

def get_split(dataset, n_features):
"""This method selects the best split for the dataset"""
class_values = list(set(row[-1] for row in dataset))
class_values = list({row[-1] for row in dataset})
b_index, b_value, b_score, b_groups = 999, 999, 999, None
features = list()
features = []
Comment on lines -129 to +120
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_split refactored with the following changes:


while len(features) < n_features :
index = randrange(len(dataset[0]) - 1)
Expand Down Expand Up @@ -189,21 +178,22 @@ def build_tree(train, max_depth, min_size, n_features):

def predict(node, row):
"""This method makes a prediction with a decision tree"""
if row[node['index']] < node['value']:
if isinstance(node['left'], dict):
return predict(node['left'], row)
else:
return node['left']
if row[node['index']] >= node['value']:
return (
predict(node['right'], row)
if isinstance(node['right'], dict)
else node['right']
)

if isinstance(node['left'], dict):
return predict(node['left'], row)
else:
if isinstance(node['right'], dict):
return predict(node['right'], row)
else:
return node['right']
return node['left']
Comment on lines -192 to +191
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function predict refactored with the following changes:



def subsample(dataset, ratio):
"""This method creates a random subsample from the dataset with replacement"""
sample = list()
sample = []
Comment on lines -206 to +196
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function subsample refactored with the following changes:

n_sample = round(len(dataset) * ratio)
while len(sample) < n_sample:
index = randrange(len(dataset))
Expand All @@ -219,22 +209,22 @@ def bagging_predict(trees, row):

def random_forest(train, test, max_depth, min_size, sample_size, n_trees, n_features):
"""Random Forest Algorithm"""
trees = list()
for i in range(n_trees):
trees = []
for _ in range(n_trees):
sample = subsample(train, sample_size)
tree = build_tree(sample, max_depth, min_size, n_features)
trees.append(tree)
predictions = [bagging_predict(trees, row) for row in test]
return predictions
return [bagging_predict(trees, row) for row in test]
Comment on lines -222 to +217
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function random_forest refactored with the following changes:



"""Test run the algorithm"""

seed(2)
# load and prepare the data
filename = "/home/amogh/PycharmProjects/deeplearning/indie_projects/sonar_data.csv"
dataset = load_csv(filename)
# convert string attributes to integers
for i in range(0, len(dataset[0]) - 1):
for i in range(len(dataset[0]) - 1):
Comment on lines +221 to +227
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 237-237 refactored with the following changes:

str_column_to_float(dataset, i)
# convert class columns to integers
str_columm_to_int(dataset, len(dataset[0]) - 1)
Expand Down
5 changes: 1 addition & 4 deletions working_with_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,7 @@ def parse_row(input_row, parsers):
def try_parse_field(field_name, value, parser_dict):
"""try to parse value using the appropriate function from parser_dict"""
parser = parser_dict.get(field_name) # None if no such entry
if parser is not None:
return try_or_none(parser)(value)
else:
return value
return try_or_none(parser)(value) if parser is not None else value
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function try_parse_field refactored with the following changes:



def parse_dict(input_dict, parser_dict):
Expand Down