# Python Ep 03: Herramientas de control de flujo

## Creación de funciones de usuario

### Ejemplo

Se desea construir una función que cuente las ocurrencias de cada elemento en una columna de una tabla.

In [None]:
#
# Se descargan los datos de la tabla
#
tweets_url = (
    "https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/tweets.csv"
)
!wget --quiet {tweets_url} -P /tmp/

#
# Se cargan los datos en una tabla
#
import pandas as pd

# df es DataFrame
tweets_df = pd.read_csv("/tmp/tweets.csv")

# Se imprimen los nombres de todas las columnas
for column in sorted(tweets_df.columns):
    print(column)

contributors
coordinates
created_at
entities
extended_entities
favorite_count
favorited
filter_level
geo
id
id_str
in_reply_to_screen_name
in_reply_to_status_id
in_reply_to_status_id_str
in_reply_to_user_id
in_reply_to_user_id_str
is_quote_status
lang
place
possibly_sensitive
quoted_status
quoted_status_id
quoted_status_id_str
retweet_count
retweeted
retweeted_status
source
text
timestamp_ms
truncated
user


In [None]:
from pandas.core.frame import DataFrame as DataFrame

In [None]:
display(type(tweets_df))

display(type(tweets_df) == DataFrame)

display(type(tweets_df) == pd.core.frame.DataFrame)

pandas.core.frame.DataFrame

True

True

In [None]:
tweets_df.head()

Unnamed: 0,contributors,coordinates,created_at,entities,extended_entities,favorite_count,favorited,filter_level,geo,id,...,quoted_status_id,quoted_status_id_str,retweet_count,retweeted,retweeted_status,source,text,timestamp_ms,truncated,user
0,,,Tue Mar 29 23:40:17 +0000 2016,"{'hashtags': [], 'user_mentions': [{'screen_na...","{'media': [{'sizes': {'large': {'w': 1024, 'h'...",0,False,low,,714960401759387648,...,,,0,False,"{'retweeted': False, 'text': "".@krollbondratin...","<a href=""http://twitter.com"" rel=""nofollow"">Tw...",RT @bpolitics: .@krollbondrating's Christopher...,1459294817758,False,"{'utc_offset': 3600, 'profile_image_url_https'..."
1,,,Tue Mar 29 23:40:17 +0000 2016,"{'hashtags': [{'text': 'cruzsexscandal', 'indi...","{'media': [{'sizes': {'large': {'w': 500, 'h':...",0,False,low,,714960401977319424,...,,,0,False,"{'retweeted': False, 'text': '@dmartosko Cruz ...","<a href=""http://twitter.com"" rel=""nofollow"">Tw...",RT @HeidiAlpine: @dmartosko Cruz video found.....,1459294817810,False,"{'utc_offset': None, 'profile_image_url_https'..."
2,,,Tue Mar 29 23:40:17 +0000 2016,"{'hashtags': [], 'user_mentions': [], 'symbols...",,0,False,low,,714960402426236928,...,,,0,False,,"<a href=""http://www.facebook.com/twitter"" rel=...",Njihuni me Zonjën Trump !!! | Ekskluzive https...,1459294817917,False,"{'utc_offset': 7200, 'profile_image_url_https'..."
3,,,Tue Mar 29 23:40:17 +0000 2016,"{'hashtags': [], 'user_mentions': [], 'symbols...",,0,False,low,,714960402367561730,...,7.149239e+17,7.149239e+17,0,False,,"<a href=""http://twitter.com/download/android"" ...",Your an idiot she shouldn't have tried to grab...,1459294817903,False,"{'utc_offset': None, 'profile_image_url_https'..."
4,,,Tue Mar 29 23:40:17 +0000 2016,"{'hashtags': [], 'user_mentions': [{'screen_na...",,0,False,low,,714960402149416960,...,,,0,False,"{'retweeted': False, 'text': 'The anti-America...","<a href=""http://twitter.com/download/iphone"" r...",RT @AlanLohner: The anti-American D.C. elites ...,1459294817851,False,"{'utc_offset': -18000, 'profile_image_url_http..."


In [None]:
#
# Se desea analizar los retweets.
# Estos empiezan por RT y están ubicados en la columna "text" 
#

# Se mirarán los elementos de la columna "text"
tweets_df.text

0     RT @bpolitics: .@krollbondrating's Christopher...
1     RT @HeidiAlpine: @dmartosko Cruz video found.....
2     Njihuni me Zonjën Trump !!! | Ekskluzive https...
3     Your an idiot she shouldn't have tried to grab...
4     RT @AlanLohner: The anti-American D.C. elites ...
                            ...                        
95    RT @claytoncubitt: Stop asking Bernie supporte...
96    Kasich is gonna fuck this up for Ted Cruz  htt...
97    RT @akaMaude13: Seriously can't make this up. ...
98    Kasich is gonna fuck this up for Ted Cruz  htt...
99    @marklevinshow try reporting this truth. https...
Name: text, Length: 100, dtype: object

In [None]:
# Se aplica una función anónima a los elementos "x" de la columna de "text"
# La función para filter tiene resultado booleano
# Solo si el resultado es verdadero, el elemento se guarda en la estructura de datos nueva 'result'
result = filter(lambda x: x[:2] == "RT", tweets_df.text)

print("Tipo de dato de la variable 'result':")
display(type(result))
print()

# Se crea una lista a partir del filter
res_list = list(result)

# Se mostrarán los 5 primeros elementos de la lista
res_list[:5]

Tipo de dato de la variable 'result':


filter




["RT @bpolitics: .@krollbondrating's Christopher Whalen says Clinton is the weakest Dem candidate in 50 years https://t.co/pLk7rvoRSn https:/…",
 'RT @HeidiAlpine: @dmartosko Cruz video found.....racing from the scene.... #cruzsexscandal https://t.co/zuAPZfQDk3',
 'RT @AlanLohner: The anti-American D.C. elites despise Trump for his America-first foreign policy. Trump threatens their gravy train. https:…',
 'RT @BIackPplTweets: Young Donald trump meets his neighbor  https://t.co/RFlu17Z1eE',
 'RT @trumpresearch: @WaitingInBagdad @thehill Trump supporters have selective amnisia.']

In [None]:
def count_entries(df: DataFrame, col_name:str ="lang"):
    """Return a dictionary with counts of
    occurrences as value for each key."""

    cols_count = {}

    try:
        col = df[col_name]

        for entry in col:
            if entry in cols_count.keys():
                cols_count[entry] += 1
            else:
                cols_count[entry] = 1

        return cols_count

    except ValueError:
        print(f"La columna {col_name} no existe!")

In [None]:
counted_entries = count_entries(tweets_df, "text")
print(counted_entries)

{"RT @bpolitics: .@krollbondrating's Christopher Whalen says Clinton is the weakest Dem candidate in 50 years https://t.co/pLk7rvoRSn https:/…": 1, 'RT @HeidiAlpine: @dmartosko Cruz video found.....racing from the scene.... #cruzsexscandal https://t.co/zuAPZfQDk3': 1, 'Njihuni me Zonjën Trump !!! | Ekskluzive https://t.co/4KmsQi47VD': 1, "Your an idiot she shouldn't have tried to grab trump after the fact she's an idiot https://t.co/lpASyeNVpG": 2, 'RT @AlanLohner: The anti-American D.C. elites despise Trump for his America-first foreign policy. Trump threatens their gravy train. https:…': 1, 'RT @BIackPplTweets: Young Donald trump meets his neighbor  https://t.co/RFlu17Z1eE': 1, 'RT @trumpresearch: @WaitingInBagdad @thehill Trump supporters have selective amnisia.': 2, 'RT @HouseCracka: 29,000+ PEOPLE WATCHING TRUMP LIVE ON ONE STREAM!!!\n\nhttps://t.co/7QCFz9ehNe': 1, 'RT @urfavandtrump: RT for Brendon Urie\nFav for Donald Trump https://t.co/PZ5vS94lOg': 2, 'RT @trapgrampa: This is h

## Ámbito de las variables

In [None]:
# Ambito de las variables
int_var = 5

def my_function(int_var):
    int_var = 3
    print("inside:", int_var)

my_function(int_var)
print("outside:", int_var)

inside: b
outside: a


In [None]:
# Se define la variable como global
global_var = 0


def user_function():
    """Cambia el valor de la variable global"""
    global global_var
    global_var += 1


display(global_var)
user_function()
display(global_var)

0

1

In [None]:
# Verificando variables
int_var = 5

def my_function(a):
    int_var = a
    print("inside:", int_var)

my_function(3)
print("outside:", int_var)

inside: 3
outside: 5


In [None]:
# Usando global
int_var = 5

def my_function(a):
    global int_var
    int_var = a
    print("inside:", int_var)

my_function(3)
print("outside:", int_var)

inside: b
outside: b
