Skip to content

Commit

Permalink
Merge pull request #1 from masterdatascience-UIMP-UC/master
Browse files Browse the repository at this point in the history
06, 07 - Numpy y Pandas
  • Loading branch information
joseney19 committed Oct 11, 2018
2 parents bf543c0 + 64c5838 commit 666fc2f
Show file tree
Hide file tree
Showing 7 changed files with 1,989 additions and 13 deletions.
54 changes: 41 additions & 13 deletions 03.2-ejercicio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -102,7 +102,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -120,11 +120,11 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def words_to_frequencies(lyrics)\n",
"def words_to_frequencies(lyrics):\n",
" \"\"\"\n",
" Convert words into frequencies. Return a dictionarky whose keys are the\n",
" words with the frequency as the value\n",
Expand All @@ -145,38 +145,39 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def most_common_words(frequencies)\n",
"def most_common_words(frequencies):\n",
" \"\"\"\n",
" Return a tuple containing:\n",
" * The number of occurences of a word in the first tuple element\n",
" * A list containing the words with that frequency\n",
" \"\"\"\n",
" values = frequencies.values()\n",
" max = max(values)\n",
" best = max(values)\n",
" \n",
" words = []\n",
" for word, score in frequencies.items():\n",
" if score == max(value):\n",
" if score == best:\n",
" words.append(word)\n",
" return (max, words)"
" return (best, words)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def get_more_often_user_words(threshold=10, frequencies):\n",
"def get_more_often_user_words(frequencies, threshold=10):\n",
" \"\"\"\n",
" Return a list of the words that are used more often, above\n",
" the *optional* threshold. If no threshold is passed, use 10.\n",
" \"\"\"\n",
" \n",
" result = []\n",
" while True:\n",
" score = most_common_words(frequencies)\n",
" if score[0] < threshold:\n",
Expand All @@ -190,9 +191,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Palabras más frecuentes con más de 10 apariciones.\n",
"[(22, ['que']), (15, ['tu', 'a']), (12, ['quiero']), (10, ['despacito', 'sube'])]\n",
"\n",
"\n",
"Palabras más frecuentes con más de 10 apariciones.\n",
"[(9, ['y', 'de', 'favoritos']), (8, ['te', 'tus', 'pasito', 'poquito']), (6, ['oh', 'el', 'mi']), (5, ['no', 'un', 'yo', 'vamos'])]\n"
]
}
],
"source": [
"words_clean = split_into_words(lyrics)\n",
"\n",
Expand All @@ -204,6 +218,20 @@
"print(\"Palabras más frecuentes con más de 10 apariciones.\")\n",
"print(get_more_often_user_words(freqs, threshold=5))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit 666fc2f

Please sign in to comment.