From 7e0b370fbcc8d157a6b9caebefacf226c109944d Mon Sep 17 00:00:00 2001 From: michaelhyatt Date: Sat, 21 Oct 2017 17:39:31 +1100 Subject: [PATCH] Handling value errors --- scraper.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/scraper.py b/scraper.py index 77c8617..54ed09d 100644 --- a/scraper.py +++ b/scraper.py @@ -52,7 +52,7 @@ secs = df_asx300secs.Code.values # Load from Yahoo -num = 1 +num = 15 number_to_read = int(os.environ['MORPH_NUMBER_TO_LOAD']) if 'MORPH_NUMBER_TO_LOAD' in os.environ else len(secs) for index in range(0, number_to_read, num): @@ -71,7 +71,19 @@ except RemoteDataError: print "RemoteDataError" except ValueError: - print "ValueError" + print "ValueError, switching to individuals" + + for single_sec in codes: + try: + data = web.DataReader(single_sec, 'yahoo', FROM_DATE, TO_DATE) + + if pricing_panel is None: + pricing_panel = data + else: + pricing_panel = pd.concat([pricing_panel, data], axis=2) + except (RemoteDataError, ValueError): + print 'Error retrieving %s' % single_sec + except InvalidIndexError: print "InvalidIndexError" @@ -90,9 +102,11 @@ MY_MAV_TIME_PERIOD = int(os.environ['MORPH_MY_MAV_TIME_PERIOD']) for sec in pricing_data.keys(): - pricing_data[sec]["MY_MAV"] = pricing_data[sec]["Close"].fillna(method='ffill').rolling(window=MY_MAV_TIME_PERIOD, center=False).mean() - pricing_data[sec]["MY_SHORT_MAV"] = pricing_data[sec]["Close"].fillna(method='ffill').rolling(window=MY_SHORT_MAV_TIME_PERIOD, - center=False).mean() + pricing_data[sec]["MY_MAV"] = pricing_data[sec]["Close"].fillna(method='ffill').rolling(window=MY_MAV_TIME_PERIOD, + center=False).mean() + pricing_data[sec]["MY_SHORT_MAV"] = pricing_data[sec]["Close"].fillna(method='ffill').rolling( + window=MY_SHORT_MAV_TIME_PERIOD, + center=False).mean() pricing_data[sec]["MY_RSI"] = pricing_data[sec]["MY_SHORT_MAV"] - pricing_data[sec]["MY_MAV"] pricing_data[sec]["MY_RSI_RANK"] = pricing_data[sec]["MY_RSI"].rank(pct=True, method='average').round(2) - 0.01 pricing_data[sec]["Days_Over_Under"] = np.where(pricing_data[sec]["MY_SHORT_MAV"] > pricing_data[sec]["MY_MAV"], 1, @@ -103,7 +117,6 @@ 0) * 50 pricing_data[sec]["Rounded_Days"] = (pricing_data[sec]["Days"] / 10).round(0) * 10 - columns = [] columns.extend(df_asx300secs.columns) columns.extend(["URL", "extraction_date", "extracted_on"]) @@ -127,18 +140,16 @@ sorted_winners1 = winners_vs_20.sort_values(by=["MY_RSI_RANK", "Days_x_Ratio"], ascending=False) - # Apply some filtering to remove noisy stocks sorted_winners2 = sorted_winners1[ (sorted_winners1["Volume"] > int(os.environ['MORPH_VOLUME_CUTOVER'])) & (sorted_winners1["Close"] > float(os.environ['MORPH_CLOSE_CUTOVER'])) -] + ] sorted_winners = sorted_winners2[["extraction_date", "Code", "Company", "Industry group", "URL", "MY_RSI_RANK", "Days", "Days_x_Ratio", "Rounded_Days", "extracted_on", "Volume", "Close", "MY_MAV", "MY_SHORT_MAV"]] - # Save in the database for index, row in sorted_winners.iterrows(): scraperwiki.sqlite.save(unique_keys=['Code', 'extraction_date'], data=row.to_dict())