<img width="8%" alt="Naas.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Naas.png" style="border-radius: 15%">

# Data

## Input

### Import libraries

In [None]:
import pickle
import os
from unidecode import unidecode
from difflib import SequenceMatcher
import re
from datetime import datetime, timedelta
import pytz
import naas
from naas_drivers import linkedin
import naas_data_product

## Model

### Pickel functions

#### Pickle dump

In [None]:
def pdump(
    output_dir,
    object_to_dump,
    file_to_dump_to,
):
    os.makedirs(output_dir, exist_ok=True)
    file_path = os.path.join(output_dir, f'{file_to_dump_to}.pickle')
    with open(file_path, 'wb') as file:
        pickle.dump(object_to_dump, file)

#### Pickle load

In [None]:
def pload(
    output_dir,
    file_to_load_from
):
    file_path = os.path.join(output_dir, f'{file_to_load_from}.pickle')
    try:
        with open(file_path, 'rb') as file:
            return pickle.load(file)
    except:
        return None

### Sequence Matcher from df

In [None]:
def remove_accent(string):
    # Use the unidecode function to remove accents
    string_without_accent = unidecode(string)
    return string_without_accent.lower()

def are_identical(string1, string2):
    string1 = remove_accent(string1)
    string2 = remove_accent(string2)
    # Create a SequenceMatcher object
    matcher = SequenceMatcher(None, string1, string2)
    
    # Get the ratio of similarity between the two strings
    similarity_ratio = matcher.ratio()
    # If the ratio is 1.0, the strings are identical
    if similarity_ratio > 0.9:
        return True
    else:
        return False
    
def find_crm_match(
    df,
    col_crm,
    value
):
    crm = False
    for x in df[col_crm].unique():
        if are_identical(value, str(x)):
            crm = True
            break
    return crm

### Remove emojis

In [None]:
def remove_emojis(text):
    # Emoji pattern
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002500-\U00002BEF"  # chinese char
                               u"\U00002702-\U000027B0"
                               u"\U00002702-\U000027B0"
                               u"\U000024C2-\U0001F251"
                               u"\U0001f926-\U0001f937"
                               u"\U00010000-\U0010ffff"
                               u"\u2640-\u2642"
                               u"\u2600-\u2B55"
                               u"\u200d"
                               u"\u23cf"
                               u"\u23e9"
                               u"\u231a"
                               u"\ufe0f"  # dingbats
                               u"\u3030"
                               "]+", flags=re.UNICODE)
    # Remove emojis from the text
    text = emoji_pattern.sub(r'', text)
    return text.strip()

### Format value

In [None]:
def format_number(num):
    num = str("{:,.0f}".format(abs(num))).replace(",", " ")
    return num

### Get dict from df

In [None]:
def get_dict_from_df(
    df,
    column_name,
    key,
    file,
    output_dir
):
    data = {}
    if column_name in df.columns:
        data = pload(output_dir, file)
        if data is None: 
            data = df[~df[column_name].isin(["TBD", "NA"])].set_index(key)[column_name].to_dict()
            pdump(output_dir, data, file)
    return data

### Get LinkedIn data

In [None]:
def get_linkedin_data(
    linkedin_url,
    linkedin_dir,
    data_type="top_card",
    li_at=None,
    JSESSIONID=None
):
    # Get secrets
    if not li_at:
        li_at = naas.secret.get("LINKEDIN_LI_AT")
    if not JSESSIONID:
        JSESSIONID = naas.secret.get("LINKEDIN_JSESSIONID")
    # Create ID
    linkedin_id = linkedin_url.split("/in/")[1].split("/")[0]
    df = pload(linkedin_dir, f"{linkedin_id}_linkedin_{data_type}")
    if df is None:
        try:
            if data_type == "top_card":
                df = linkedin.connect(li_at, JSESSIONID).profile.get_top_card(linkedin_url)
            elif data_type == "identity":
                df = linkedin.connect(li_at, JSESSIONID).profile.get_identity(linkedin_url)
            elif data_type == "network":
                df = linkedin.connect(li_at, JSESSIONID).profile.get_network(linkedin_url)
            elif data_type == "contact":
                df = linkedin.connect(li_at, JSESSIONID).profile.get_contact(linkedin_url)
            elif data_type == "resume":
                df = linkedin.connect(li_at, JSESSIONID).profile.get_resume(linkedin_url)
            pdump(linkedin_dir, df, f"{linkedin_id}_linkedin_{data_type}")
        except Exception as e:
            print(e)
            df = pd.DataFrame()
    return df

## Output

### Constants

In [None]:
# Timezone
tz = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "timezone")
if tz is None:
    tz = "Europe/Paris"
TIMEZONE = pytz.timezone(tz)

# Scenario values
TW = datetime.now(TIMEZONE).strftime("W%W-%Y")
LW = (datetime.now(TIMEZONE) - timedelta(days=datetime.now(TIMEZONE).weekday() + 7)).strftime("W%W-%Y")

# Mapping colors
MAPPING_COLORS = {
    TW: "#48DD82",
    LW: "#FFFDA2",
}

# Logos
arrow_up = "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c0/Eo_circle_green_arrow-up.svg/2048px-Eo_circle_green_arrow-up.svg.png"
arrow_down = "https://upload.wikimedia.org/wikipedia/commons/thumb/b/b7/Eo_circle_red_arrow-down.svg/2048px-Eo_circle_red_arrow-down.svg.png"
arrow_right = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/42/Eo_circle_orange_arrow-right.svg/2048px-Eo_circle_orange_arrow-right.svg.png"