In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests

import pandas as pd
import numpy as np

from datetime import datetime

In [2]:
# get today's date, used in CSV file path
now = datetime.now()
stocks_data_fname_cur = '../assets/stocks_raw_data_{}_{}_{}.csv'.format(now.day, now.month, now.year)
stocks_data_fname_lastest = '../assets/stocks_raw_data_latest.csv'

In [4]:
url = 'http://www.calcalist.co.il/stocks/home/0,7340,L-4021,00.html'

driver = webdriver.Firefox()
driver.delete_all_cookies()
driver.get(url)
driver.implicitly_wait(2) # seconds

max_wait_time = 10

In [5]:
# remove all filters params except market cap
WebDriverWait(driver, max_wait_time).until(
    EC.presence_of_element_located(
        (By.XPATH, '//*[@id="stock_filter_2"]/div/a'))).click()
WebDriverWait(driver, max_wait_time).until(
    EC.presence_of_element_located(
        (By.XPATH, '//*[@id="stock_filter_9"]/div/a'))).click()
WebDriverWait(driver, max_wait_time).until(
    EC.presence_of_element_located(
        (By.XPATH, '//*[@id="stock_filter_29"]/div/a'))).click()

# click on "show results"
results_button = WebDriverWait(driver, max_wait_time).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="AmountSection"]/div/div/div/a')))
results_button.click()

# get session token
sess_token = WebDriverWait(driver, max_wait_time).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="StockResultsArea"]/div[4]/a[2]')))
sess_token = sess_token.get_attribute(name='href')

In [6]:
# prase token string (remove and replace)
sess_token = sess_token.split("javascript:SendPrintAndExel('/")[1]
sess_token = sess_token.split("',%20'excel',%200,%20'SelectTitle')")[0]
sess_token = sess_token.replace("1*MARKET_VALUE",
    "1-2-5-6-7-8-9-12-14-17-18-19-20-21-22-23-24-25-26-27-30-28-31-34-35-29*MARKET_VALUE")
sess_token = sess_token.split('?minmax=replaceminmax&anaf=repanaf&hideParams=REPLACEHIDEPARAMS')[0]

data_download_url = "http://www.calcalist.co.il/" + sess_token

In [7]:
# get data as table
data_file = requests.get(data_download_url)
data_file.encoding = 'UTF-8'

# close webdriver
driver.close()

# read table as pd df
stocks_data = pd.read_html(data_file.text)[-1]
stocks_data.columns = stocks_data.iloc[0]  # make col names
stocks_data = stocks_data.reindex(stocks_data.index.drop(0))  # drop name row and reindex

In [8]:
stocks_data.replace('N/A',np.NaN)
stocks_data.replace('N/A%',np.NaN)
stocks_data.replace('N/A%',np.NaN)

Unnamed: 0,שם מניה,שווי שוק,EPS-רווח למניה,רווח גולמי למכירות,רווח תפעולי למכירות,רווח לפני מס למכירות,רווח נקי למכירות,תשואה על ההון העצמי,תשואה על ההשקעה,תשואה על הנכסים,...,% תשואה 3 חודשים אחרונים,% תשואה מתחילת השנה,% תשואה 12 חודשים אחרונים,מכפיל תזרים,מכפיל הון,מכפיל מכירות,הון עצמי למאזן,יחס שוטף,מנוף פיננסי,מכפיל רווח
1,טבע,117663,2.1,0.50,0.16,0.12,0.11,1.9,0.01,0.01,...,2.8%,-16.2%,-45.5%,,0.95,1.4,0.37,0.93,0.61,97.0
2,מיילן,69089,0.44,0.40,0.08,0.03,0.02,0.57,0.00,0.00,...,4.0%,-3.5%,-22.2%,,1.6,1.6,0.34,1.6,0.66,34.0
3,פריגו,37292,1.8,0.39,0.11,0.08,0.06,1.2,0.01,0.01,...,8.9%,-16.9%,-27.1%,,1.7,1.9,0.43,2.1,0.57,-2.8
4,פועלים,33473,0.58,,,,,2.2,,0.00,...,11.8%,9.6%,26.3%,1.4,0.96,,0.08,,,12.3
5,לאומי,26890,0.41,,,,,1.9,,0.00,...,5.0%,11.3%,25.8%,1.9,0.83,,0.07,,,9.1
6,עזריאלי,23733,2.0,0.61,0.49,0.46,0.36,1.6,0.01,0.01,...,0.92%,17.1%,16.5%,17.4,1.6,9.6,0.51,0.87,0.49,17.2
7,כיל,21520,19.3,0.28,0.09,0.08,0.05,2.6,0.01,0.01,...,11.5%,6.7%,5.2%,,2.3,1.1,0.30,1.3,0.69,-45.7
8,אלביט מערכות,19170,3.9,0.30,0.08,0.07,0.06,2.9,0.02,0.01,...,5.3%,15.0%,21.6%,,3.3,1.5,0.36,1.3,0.64,21.8
9,נייס,16805,1.1,0.62,0.09,0.06,0.06,1.1,0.01,0.01,...,12.3%,6.1%,12.9%,,2.9,4.0,0.59,1.3,0.41,44.2
10,דלק קידוחים יהש,15917,0.27,0.78,0.76,0.67,0.64,11.1,0.03,0.03,...,6.5%,-3.8%,-2.2%,23.2,12.3,17.4,0.25,4.7,0.75,25.3


In [9]:
def format_percent_and_float(x):
    try:
        x = x.replace(',', '.')
        if '%' in x:
            x = float(x.strip('%'))/100
        else:
            x = float(x)
        return x
    except:
        return np.NaN

In [10]:
for col in stocks_data.columns:
    if col in ['שם מניה', 'שווי שוק']:
        pass
    else:
        # transform the columns and replace
        stocks_data[col] = stocks_data[col].apply(format_percent_and_float)
    if col in ['תשואות דיבידנט', 'תשואה על ההון העצמי']:
        stocks_data[col] = stocks_data[col].apply(lambda x: float(x/100))

In [11]:
# save to file
stocks_data.to_csv(stocks_data_fname_cur, 
                   sep=';', na_rep='NaN', index=False, encoding='UTF-8')
stocks_data.to_csv(stocks_data_fname_lastest, 
                   sep=';', na_rep='NaN', index=False, encoding='UTF-8')

In [None]:
import shapefile
import matplotlib.pyplot as plt

sf = shapefile.Reader('/media/yarden/Data/Python_env/Mensa Data vis R script/CHE_adm_shp/CHE_adm1.shp')

In [None]:
plt.cla()

In [None]:
# plt.figure()
ax = plt.axes() # add the axes
ax.set_xlim([5.5, 11])
ax.set_ylim([45.5, 48])
ax.set_aspect('equal')

for shape in list(sf.iterShapes()):
    npoints=len(shape.points) # total points
    nparts = len(shape.parts) # total parts

    if nparts == 1:
        x_lon = np.zeros((len(shape.points),1))
        y_lat = np.zeros((len(shape.points),1))
        for ip in range(len(shape.points)):
            x_lon[ip] = shape.points[ip][0]
            y_lat[ip] = shape.points[ip][1]
        plt.plot(x_lon,y_lat)

    else: # loop over parts of each shape, plot separately
        for ip in range(nparts): # loop over parts, plot separately
            i0=shape.parts[ip]
            if ip < nparts-1:
                i1 = shape.parts[ip+1]-1
            else:
                i1 = npoints
            
            seg=shape.points[i0:i1+1]
            x_lon = np.zeros((len(seg),1))
            y_lat = np.zeros((len(seg),1))
            for ip in range(len(seg)):
                x_lon[ip] = seg[ip][0]
                y_lat[ip] = seg[ip][1]

            plt.plot(x_lon,y_lat)

plt.show()