-
Notifications
You must be signed in to change notification settings - Fork 0
Sourcery refactored master branch #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -166,7 +166,7 @@ def maximize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01): | |
|
|
||
| print("using the gradient") | ||
|
|
||
| v = [random.randint(-10, 10) for i in range(3)] | ||
| v = [random.randint(-10, 10) for _ in range(3)] | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
|
|
||
| tolerance = 0.0000001 | ||
|
|
||
|
|
@@ -183,7 +183,7 @@ def maximize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01): | |
| print() | ||
| print("using minimize_batch") | ||
|
|
||
| v = [random.randint(-10, 10) for i in range(3)] | ||
| v = [random.randint(-10, 10) for _ in range(3)] | ||
|
|
||
| v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -121,8 +121,8 @@ def make_hist(p, n, num_points): | |
| older = random_kid() | ||
| if older == "girl": | ||
| older_girl += 1 | ||
| if older == "girl" and younger == "girl": | ||
| both_girls += 1 | ||
| if younger == "girl": | ||
| both_girls += 1 | ||
|
Comment on lines
-124
to
+125
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
| if older == "girl" or younger == "girl": | ||
| either_girl += 1 | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -50,11 +50,10 @@ def median(v): | |
| if n % 2 == 1: | ||
| # if odd, return the middle value | ||
| return sorted_v[midpoint] | ||
| else: | ||
| # if even, return the average of the middle values | ||
| lo = midpoint - 1 | ||
| hi = midpoint | ||
| return (sorted_v[lo] + sorted_v[hi]) / 2 | ||
| # if even, return the average of the middle values | ||
| lo = midpoint - 1 | ||
| hi = midpoint | ||
| return (sorted_v[lo] + sorted_v[hi]) / 2 | ||
|
Comment on lines
-53
to
+56
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def quantile(x, p): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,8 +13,7 @@ | |
|
|
||
|
|
||
| def load_data(filepath): | ||
| data = pd.read_csv(filepath) | ||
| return data | ||
| return pd.read_csv(filepath) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def describe_data(data, name): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,9 +32,7 @@ def train(self, inputs): | |
|
|
||
| # and compute the new means based on the new assignments | ||
| for i in range(self.k): | ||
| i_points = [p for p, a in zip(inputs, assignments) if a == i] | ||
|
|
||
| if i_points: | ||
| if i_points := [p for p, a in zip(inputs, assignments) if a == i]: | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| self.means[i] = vector_mean(i_points) | ||
|
|
||
|
|
||
|
|
@@ -104,26 +102,28 @@ def cluster_distance(cluster1, cluster2, distance_agg=min): | |
|
|
||
|
|
||
| def get_merge_order(cluster): | ||
| if is_leaf(cluster): | ||
| return float('inf') | ||
| else: | ||
| return cluster[0] | ||
| return float('inf') if is_leaf(cluster) else cluster[0] | ||
|
Comment on lines
-107
to
+105
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def bottom_up_cluster(inputs, distance_agg=min): | ||
| # start with every input leaf cluster | ||
| clusters = [input for input in inputs] | ||
| clusters = list(inputs) | ||
|
|
||
| # as long as we have more than one cluster left... | ||
| while len(clusters) > 1: | ||
| # find the two closest clusters | ||
| c1, c2 = min([(cluster1, cluster2) | ||
| for i, cluster1 in enumerate(clusters) | ||
| for cluster2 in clusters[:i]], | ||
| key=lambda p: cluster_distance(p[0], p[1], distance_agg)) | ||
| c1, c2 = min( | ||
| ( | ||
| (cluster1, cluster2) | ||
| for i, cluster1 in enumerate(clusters) | ||
| for cluster2 in clusters[:i] | ||
| ), | ||
| key=lambda p: cluster_distance(p[0], p[1], distance_agg), | ||
| ) | ||
|
|
||
|
|
||
| # remove them from the list of clusters | ||
| clusters = [c for c in clusters if c != c1 and c != c2] | ||
| clusters = [c for c in clusters if c not in [c1, c2]] | ||
|
Comment on lines
-115
to
+126
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| # merge them, using merge _order = # of cluster left | ||
| merged_cluster = (len(clusters), [c1, c2]) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,10 +21,7 @@ def majority_vote(labels): | |
| for count in vote_counts.values() | ||
| if count == winner_count]) | ||
|
|
||
| if num_winners == 1: | ||
| return winner # unique winner, so return it | ||
| else: | ||
| return majority_vote(labels[:-1]) # try again without the farthest | ||
| return winner if num_winners == 1 else majority_vote(labels[:-1]) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ): |
||
|
|
||
|
|
||
| def knn_classify(k, labeled_points, new_point): | ||
|
|
@@ -91,7 +88,7 @@ def classify_and_plot_grid(k=1): | |
|
|
||
| plt.legend(loc=0) # let matplotlib choose the location | ||
| plt.axis([-130,-60,20,55]) # set the axes | ||
| plt.title(str(k) + "-Nearest Neighbor Programming Languages") | ||
| plt.title(f'{str(k)}-Nearest Neighbor Programming Languages') | ||
|
Comment on lines
-94
to
+91
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| plt.show() | ||
|
|
||
| # | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,7 +31,7 @@ def total_sum_of_squares(y): | |
|
|
||
|
|
||
| def estimate_beta(x, y): | ||
| beta_initial = [random.random() for x_i in x[0]] | ||
| beta_initial = [random.random() for _ in x[0]] | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| return minimize_stochastic(squared_error, | ||
| squared_error_gradient, | ||
| x, y, | ||
|
|
@@ -99,7 +99,7 @@ def squared_error_ridge_gradient(x_i, y_i, beta, alpha): | |
| def estimate_beta_ridge(x, y, alpha): | ||
| """use gradient descent to fit a ridge regression | ||
| with penalty alpha""" | ||
| beta_initial = [random.random() for x_i in x[0]] | ||
| beta_initial = [random.random() for _ in x[0]] | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| return minimize_stochastic(partial(squared_error_ridge, alpha=alpha), | ||
| partial(squared_error_ridge_gradient, | ||
| alpha=alpha), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -128,10 +128,7 @@ def random_y_given_x(x): | |
|
|
||
|
|
||
| def random_x_given_y(y): | ||
| if y <= 7: | ||
| return random.randrange(1, y) | ||
| else: | ||
| return random.randrange(y - 6, 7) | ||
| return random.randrange(1, y) if y <= 7 else random.randrange(y - 6, 7) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def gibbs_sampling(num_iters=100): | ||
|
|
@@ -171,9 +168,7 @@ def sample_from(weights): | |
| topic_counts = [0 for _ in range(K)] | ||
| document_lengths = [len(d) for d in documents] | ||
|
|
||
| distinct_words = set(word | ||
| for document in documents | ||
| for word in document) | ||
| distinct_words = {word for document in documents for word in document} | ||
|
Comment on lines
-174
to
+171
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
|
|
||
| W = len(distinct_words) | ||
| D = len(documents) | ||
|
|
@@ -203,16 +198,18 @@ def choose_new_topic(d, word): | |
|
|
||
|
|
||
| random.seed(0) | ||
| document_topics = [[random.randrange(K) for word in document] | ||
| for document in documents] | ||
| document_topics = [ | ||
| [random.randrange(K) for _ in document] for document in documents | ||
| ] | ||
|
|
||
|
Comment on lines
-206
to
+204
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
|
|
||
| for d in range(D): | ||
| for word, topic in zip(documents[d], document_topics[d]): | ||
| document_topic_counts[d][topic] += 1 | ||
| topic_word_counts[topic][word] += 1 | ||
| topic_counts[topic] += 1 | ||
|
|
||
| for iter in range(1000): | ||
| for _ in range(1000): | ||
| for d in range(D): | ||
| for i, (word, topic) in enumerate(zip(documents[d], document_topics[d])): | ||
| # remove this word/topic from the counts | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,14 +5,10 @@ | |
|
|
||
| def load_csv(filename): | ||
| """This method loads a csv file""" | ||
| dataset = list() | ||
| dataset = [] | ||
| with open(filename, 'r') as file: | ||
| csv_reader = reader(file) | ||
| for row in csv_reader: | ||
| if not row: | ||
| continue | ||
| dataset.append(row) | ||
|
|
||
| dataset.extend(row for row in csv_reader if row) | ||
|
Comment on lines
-8
to
+11
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| return dataset | ||
|
|
||
|
|
||
|
|
@@ -26,10 +22,7 @@ def str_columm_to_int(dataset, column): | |
| """This method converts a string column to int""" | ||
| class_values = [row[column] for row in dataset] | ||
| unique = set(class_values) | ||
| lookup = dict() | ||
|
|
||
| for i, value in enumerate(unique): | ||
| lookup[value] = i | ||
| lookup = {value: i for i, value in enumerate(unique)} | ||
|
Comment on lines
-29
to
+25
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| for row in dataset: | ||
| row[column] = lookup[row[column]] | ||
|
|
@@ -39,12 +32,12 @@ def str_columm_to_int(dataset, column): | |
|
|
||
| def cross_validation_split(dataset, k_folds): | ||
| """This method splits a dataset into k folds""" | ||
| dataset_split = list() | ||
| dataset_split = [] | ||
| dataset_copy = list(dataset) | ||
| fold_size = int(len(dataset) / k_folds) | ||
|
|
||
| for i in range(k_folds): | ||
| fold = list() | ||
| for _ in range(k_folds): | ||
| fold = [] | ||
|
Comment on lines
-42
to
+40
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| while(len(fold) < fold_size): | ||
| index = randrange(len(dataset_copy)) | ||
| fold.append(dataset_copy.pop(index)) | ||
|
|
@@ -55,25 +48,21 @@ def cross_validation_split(dataset, k_folds): | |
|
|
||
| def accuracy_score(actual, predicted): | ||
| """This method predicts the accuracy percentage""" | ||
| correct = 0 | ||
| for i in range(len(actual)): | ||
| if actual[i] == predicted[i]: | ||
| correct += 1 | ||
|
|
||
| correct = sum(actual[i] == predicted[i] for i in range(len(actual))) | ||
|
Comment on lines
-58
to
+51
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| return correct / float(len(actual)) * 100.0 | ||
|
|
||
|
|
||
| def evaluate_algorithm(dataset, algorithm, k_folds, *args): | ||
| """This method evaluates the algorithm using a cross validation split""" | ||
| folds = cross_validation_split(dataset, k_folds) | ||
| scores = list() | ||
| scores = [] | ||
|
|
||
| for fold in folds: | ||
| train_set = list(folds) | ||
| train_set.remove(fold) | ||
| train_set = sum(train_set, []) | ||
|
|
||
| test_set = list() | ||
| test_set = [] | ||
|
Comment on lines
-69
to
+65
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| for row in fold: | ||
| row_copy = list(row) | ||
|
|
@@ -105,7 +94,7 @@ def test_split(index, value, dataset): | |
| def gini_index(groups, classes): | ||
| """This method calculates the gini index for a split dataset""" | ||
| # count all samples at split point | ||
| n_instances = float(sum([len(group) for group in groups])) | ||
| n_instances = float(sum(len(group) for group in groups)) | ||
|
Comment on lines
-108
to
+97
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| # sum weighted gini index for each group | ||
| gini = 0.0 | ||
| for group in groups: | ||
|
|
@@ -126,9 +115,9 @@ def gini_index(groups, classes): | |
|
|
||
| def get_split(dataset, n_features): | ||
| """This method selects the best split for the dataset""" | ||
| class_values = list(set(row[-1] for row in dataset)) | ||
| class_values = list({row[-1] for row in dataset}) | ||
| b_index, b_value, b_score, b_groups = 999, 999, 999, None | ||
| features = list() | ||
| features = [] | ||
|
Comment on lines
-129
to
+120
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| while len(features) < n_features : | ||
| index = randrange(len(dataset[0]) - 1) | ||
|
|
@@ -189,21 +178,22 @@ def build_tree(train, max_depth, min_size, n_features): | |
|
|
||
| def predict(node, row): | ||
| """This method makes a prediction with a decision tree""" | ||
| if row[node['index']] < node['value']: | ||
| if isinstance(node['left'], dict): | ||
| return predict(node['left'], row) | ||
| else: | ||
| return node['left'] | ||
| if row[node['index']] >= node['value']: | ||
| return ( | ||
| predict(node['right'], row) | ||
| if isinstance(node['right'], dict) | ||
| else node['right'] | ||
| ) | ||
|
|
||
| if isinstance(node['left'], dict): | ||
| return predict(node['left'], row) | ||
| else: | ||
| if isinstance(node['right'], dict): | ||
| return predict(node['right'], row) | ||
| else: | ||
| return node['right'] | ||
| return node['left'] | ||
|
Comment on lines
-192
to
+191
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def subsample(dataset, ratio): | ||
| """This method creates a random subsample from the dataset with replacement""" | ||
| sample = list() | ||
| sample = [] | ||
|
Comment on lines
-206
to
+196
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| n_sample = round(len(dataset) * ratio) | ||
| while len(sample) < n_sample: | ||
| index = randrange(len(dataset)) | ||
|
|
@@ -219,22 +209,22 @@ def bagging_predict(trees, row): | |
|
|
||
| def random_forest(train, test, max_depth, min_size, sample_size, n_trees, n_features): | ||
| """Random Forest Algorithm""" | ||
| trees = list() | ||
| for i in range(n_trees): | ||
| trees = [] | ||
| for _ in range(n_trees): | ||
| sample = subsample(train, sample_size) | ||
| tree = build_tree(sample, max_depth, min_size, n_features) | ||
| trees.append(tree) | ||
| predictions = [bagging_predict(trees, row) for row in test] | ||
| return predictions | ||
| return [bagging_predict(trees, row) for row in test] | ||
|
Comment on lines
-222
to
+217
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| """Test run the algorithm""" | ||
|
|
||
| seed(2) | ||
| # load and prepare the data | ||
| filename = "/home/amogh/PycharmProjects/deeplearning/indie_projects/sonar_data.csv" | ||
| dataset = load_csv(filename) | ||
| # convert string attributes to integers | ||
| for i in range(0, len(dataset[0]) - 1): | ||
| for i in range(len(dataset[0]) - 1): | ||
|
Comment on lines
+221
to
+227
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
| str_column_to_float(dataset, i) | ||
| # convert class columns to integers | ||
| str_columm_to_int(dataset, len(dataset[0]) - 1) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -145,10 +145,7 @@ def parse_row(input_row, parsers): | |
| def try_parse_field(field_name, value, parser_dict): | ||
| """try to parse value using the appropriate function from parser_dict""" | ||
| parser = parser_dict.get(field_name) # None if no such entry | ||
| if parser is not None: | ||
| return try_or_none(parser)(value) | ||
| else: | ||
| return value | ||
| return try_or_none(parser)(value) if parser is not None else value | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def parse_dict(input_dict, parser_dict): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function
predict_paid_or_unpaidrefactored with the following changes:merge-duplicate-blocks)remove-redundant-if)