blog/2020/recipes-generation/index.html

<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta http-equiv="x-ua-compatible" content="ie=edge"/><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"/><style data-href="/styles.f4dbc894744c3c869f21.css" id="gatsby-global-css">@import url(https://fonts.googleapis.com/css2?family=Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900&display=swap);.post--2021--binary-floating-point--bit-button:hover{box-shadow:0 0 5px 1px rgba(0,0,0,.2);transition:box-shadow .2s ease-in-out}input:checked~.toggle__dot{transform:translateX(100%)}input:checked~.toggle__line{background-color:#48bb78}.post--2021--water-line--gyro-cube .gyro-cube-container{height:400px;display:flex;justify-content:center;align-items:center;perspective:800px;perspective-origin:50%}.post--2021--water-line--gyro-cube .gyro-cube{position:relative;width:200px;height:200px;transform-style:preserve-3d}.post--2021--water-line--gyro-cube .gyro-cube-side{position:absolute;width:100%;height:100%;opacity:.8;border:2px solid #fff;display:flex;justify-content:center;align-items:center;color:#fff;font-weight:700;font-size:100px}.post--2021--water-line--gyro-cube .gyro-cube-front{background-color:#d50000;transform:translateZ(100px)}.post--2021--water-line--gyro-cube .gyro-cube-back{background-color:#a0f;transform:translateZ(-100px)}.post--2021--water-line--gyro-cube .gyro-cube-left{background-color:#304ffe;transform:translateX(100px) rotateY(90deg)}.post--2021--water-line--gyro-cube .gyro-cube-right{background-color:#0091ea;transform:translateX(-100px) rotateY(90deg)}.post--2021--water-line--gyro-cube .gyro-cube-top{background-color:#00bfa5;transform:translateY(-100px) rotateX(90deg)}.post--2021--water-line--gyro-cube .gyro-cube-bottom{background-color:#64dd17;transform:translateY(100px) rotateX(90deg)}.custom-fade-in-opacity{opacity:1;-webkit-animation-name:customFadeInOpacity;animation-name:customFadeInOpacity;-webkit-animation-iteration-count:1;animation-iteration-count:1;-webkit-animation-timing-function:ease-out;animation-timing-function:ease-out;-webkit-animation-duration:.8s;animation-duration:.8s}@-webkit-keyframes customFadeInOpacity{0%{opacity:0}to{opacity:1}}@keyframes customFadeInOpacity{0%{opacity:0}to{opacity:1}}.prose blockquote p:first-of-type:before,.prose blockquote p:last-of-type:after,.prose code:after,.prose code:before{content:""!important}.prose blockquote{font-weight:400!important}.prose li code.language-text,.prose p code.language-text,.prose td code.language-text,.prose th code.language-text,.prose tr code.language-text{padding:2px 5px 1px;font-weight:400}.prose p>img{margin:auto}.prose h1>a.gatsby-remark-autolink-header-anchor,.prose h2>a.gatsby-remark-autolink-header-anchor,.prose h3>a.gatsby-remark-autolink-header-anchor,.prose h4>a.gatsby-remark-autolink-header-anchor,.prose h5>a.gatsby-remark-autolink-header-anchor{visibility:hidden;display:inline-block;margin-left:10px}.prose h1:hover>a.gatsby-remark-autolink-header-anchor,.prose h2:hover>a.gatsby-remark-autolink-header-anchor,.prose h3:hover>a.gatsby-remark-autolink-header-anchor,.prose h4:hover>a.gatsby-remark-autolink-header-anchor,.prose h5:hover>a.gatsby-remark-autolink-header-anchor{visibility:visible}

/* ! tailwindcss v2.1.2 | MIT License | https://tailwindcss.com */

/*! modern-normalize v1.0.0 | MIT License | https://github.com/sindresorhus/modern-normalize */:root{-moz-tab-size:4;-o-tab-size:4;tab-size:4}html{line-height:1.15;-webkit-text-size-adjust:100%}body{margin:0;font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji}hr{height:0;color:inherit}abbr[title]{-webkit-text-decoration:underline dotted;text-decoration:underline dotted}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:ui-monospace,SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit}button,input,optgroup,select,textarea{font-family:inherit;font-size:100%;line-height:1.15;margin:0}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}legend{padding:0}progress{vertical-align:baseline}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}button{background-color:transparent;background-image:none}button:focus{outline:1px dotted;outline:5px auto -webkit-focus-ring-color}fieldset,ol,ul{margin:0;padding:0}ol,ul{list-style:none}html{font-family:Roboto,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;line-height:1.5}body{font-family:inherit;line-height:inherit}*,:after,:before{box-sizing:border-box;border:0 solid #e5e7eb}hr{border-top-width:1px}img{border-style:solid}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{opacity:1;color:#9ca3af}input:-ms-input-placeholder,textarea:-ms-input-placeholder{opacity:1;color:#9ca3af}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}button{cursor:pointer}table{border-collapse:collapse}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}button,input,optgroup,select,textarea{padding:0;line-height:inherit;color:inherit}code,kbd,pre,samp{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}.container{width:100%}@media (min-width:640px){.container{max-width:640px}}@media (min-width:768px){.container{max-width:768px}}@media (min-width:1024px){.container{max-width:1024px}}@media (min-width:1280px){.container{max-width:1280px}}@media (min-width:1536px){.container{max-width:1536px}}.prose{color:#374151;max-width:65ch}.prose [class~=lead]{color:#4b5563;font-size:1.25em;line-height:1.6;margin-top:1.2em;margin-bottom:1.2em}.prose a{color:#dc2626;text-decoration:underline;font-weight:500}.prose a:hover{color:#ef4444}.prose strong{color:#111827;font-weight:600}.prose ol[type=A]{--list-counter-style:upper-alpha}.prose ol[type=a]{--list-counter-style:lower-alpha}.prose ol[type=I]{--list-counter-style:upper-roman}.prose ol[type=i]{--list-counter-style:lower-roman}.prose ol[type="1"]{--list-counter-style:decimal}.prose ol>li{position:relative;padding-left:1.75em}.prose ol>li:before{content:counter(list-item,var(--list-counter-style,decimal)) ".";position:absolute;font-weight:400;color:#6b7280;left:0}.prose ul>li{position:relative;padding-left:1.75em}.prose ul>li:before{content:"";position:absolute;background-color:#d1d5db;border-radius:50%;width:.375em;height:.375em;top:.6875em;left:.25em}.prose hr{border-color:#e5e7eb;border-top-width:1px;margin-top:3em;margin-bottom:3em}.prose blockquote{font-weight:500;font-style:italic;color:#111827;border-left-width:.25rem;border-left-color:#e5e7eb;quotes:"\201C""\201D""\2018""\2019";margin-top:1.6em;margin-bottom:1.6em;padding-left:1em}.prose blockquote p:first-of-type:before{content:open-quote}.prose blockquote p:last-of-type:after{content:close-quote}.prose h1{color:#111827;font-weight:800;font-size:2.25em;margin-top:0;margin-bottom:.8888889em;line-height:1.1111111}.prose h2{color:#111827;font-weight:700;font-size:1.5em;margin-top:2em;margin-bottom:1em;line-height:1.3333333}.prose h3{font-size:1.25em;margin-top:1.6em;margin-bottom:.6em;line-height:1.6}.prose h3,.prose h4{color:#111827;font-weight:600}.prose h4{margin-top:1.5em;margin-bottom:.5em;line-height:1.5}.prose figure figcaption{color:#6b7280;font-size:.875em;line-height:1.4285714;margin-top:.8571429em}.prose code{color:#111827;font-weight:600;font-size:.875em}.prose code:after,.prose code:before{content:"`"}.prose a code{color:#111827}.prose pre{color:#e5e7eb;background-color:#1f2937;overflow-x:auto;font-size:.875em;line-height:1.7142857;margin-top:1.7142857em;margin-bottom:1.7142857em;border-radius:.375rem;padding:.8571429em 1.1428571em}.prose pre code{background-color:transparent;border-width:0;border-radius:0;padding:0;font-weight:400;color:inherit;font-size:inherit;font-family:inherit;line-height:inherit}.prose pre code:after,.prose pre code:before{content:none}.prose table{width:100%;table-layout:auto;text-align:left;margin-top:2em;margin-bottom:2em;font-size:.875em;line-height:1.7142857}.prose thead{color:#111827;font-weight:600;border-bottom-width:1px;border-bottom-color:#d1d5db}.prose thead th{vertical-align:bottom;padding-right:.5714286em;padding-bottom:.5714286em;padding-left:.5714286em}.prose tbody tr{border-bottom-width:1px;border-bottom-color:#e5e7eb}.prose tbody tr:last-child{border-bottom-width:0}.prose tbody td{vertical-align:top;padding:.5714286em}.prose{font-size:1rem;line-height:1.75}.prose p{margin-top:1.25em;margin-bottom:1.25em}.prose figure,.prose img,.prose video{margin-top:2em;margin-bottom:2em}.prose figure>*{margin-top:0;margin-bottom:0}.prose h2 code{font-size:.875em}.prose h3 code{font-size:.9em}.prose ol,.prose ul{margin-top:1.25em;margin-bottom:1.25em}.prose li{margin-top:.5em;margin-bottom:.5em}.prose>ul>li p{margin-top:.75em;margin-bottom:.75em}.prose>ul>li>:first-child{margin-top:1.25em}.prose>ul>li>:last-child{margin-bottom:1.25em}.prose>ol>li>:first-child{margin-top:1.25em}.prose>ol>li>:last-child{margin-bottom:1.25em}.prose ol ol,.prose ol ul,.prose ul ol,.prose ul ul{margin-top:.75em;margin-bottom:.75em}.prose h2+*,.prose h3+*,.prose h4+*,.prose hr+*{margin-top:0}.prose thead th:first-child{padding-left:0}.prose thead th:last-child{padding-right:0}.prose tbody td:first-child{padding-left:0}.prose tbody td:last-child{padding-right:0}.prose>:first-child{margin-top:0}.prose>:last-child{margin-bottom:0}.prose-sm{font-size:.875rem;line-height:1.7142857}.prose-sm p{margin-top:1.1428571em;margin-bottom:1.1428571em}.prose-sm [class~=lead]{font-size:1.2857143em;line-height:1.5555556;margin-top:.8888889em;margin-bottom:.8888889em}.prose-sm blockquote{margin-top:1.3333333em;margin-bottom:1.3333333em;padding-left:1.1111111em}.prose-sm h1{font-size:2.1428571em;margin-top:0;margin-bottom:.8em;line-height:1.2}.prose-sm h2{font-size:1.4285714em;margin-top:1.6em;margin-bottom:.8em;line-height:1.4}.prose-sm h3{font-size:1.2857143em;margin-top:1.5555556em;margin-bottom:.4444444em;line-height:1.5555556}.prose-sm h4{margin-top:1.4285714em;margin-bottom:.5714286em;line-height:1.4285714}.prose-sm figure,.prose-sm img,.prose-sm video{margin-top:1.7142857em;margin-bottom:1.7142857em}.prose-sm figure>*{margin-top:0;margin-bottom:0}.prose-sm figure figcaption{font-size:.8571429em;line-height:1.3333333;margin-top:.6666667em}.prose-sm code{font-size:.8571429em}.prose-sm h2 code{font-size:.9em}.prose-sm h3 code{font-size:.8888889em}.prose-sm pre{font-size:.8571429em;line-height:1.6666667;margin-top:1.6666667em;margin-bottom:1.6666667em;border-radius:.25rem;padding:.6666667em 1em}.prose-sm ol,.prose-sm ul{margin-top:1.1428571em;margin-bottom:1.1428571em}.prose-sm li{margin-top:.2857143em;margin-bottom:.2857143em}.prose-sm ol>li{padding-left:1.5714286em}.prose-sm ol>li:before{left:0}.prose-sm ul>li{padding-left:1.5714286em}.prose-sm ul>li:before{height:.3571429em;width:.3571429em;top:.67857em;left:.2142857em}.prose-sm>ul>li p{margin-top:.5714286em;margin-bottom:.5714286em}.prose-sm>ul>li>:first-child{margin-top:1.1428571em}.prose-sm>ul>li>:last-child{margin-bottom:1.1428571em}.prose-sm>ol>li>:first-child{margin-top:1.1428571em}.prose-sm>ol>li>:last-child{margin-bottom:1.1428571em}.prose-sm ol ol,.prose-sm ol ul,.prose-sm ul ol,.prose-sm ul ul{margin-top:.5714286em;margin-bottom:.5714286em}.prose-sm hr{margin-top:2.8571429em;margin-bottom:2.8571429em}.prose-sm h2+*,.prose-sm h3+*,.prose-sm h4+*,.prose-sm hr+*{margin-top:0}.prose-sm table{font-size:.8571429em;line-height:1.5}.prose-sm thead th{padding-right:1em;padding-bottom:.6666667em;padding-left:1em}.prose-sm thead th:first-child{padding-left:0}.prose-sm thead th:last-child{padding-right:0}.prose-sm tbody td{padding:.6666667em 1em}.prose-sm tbody td:first-child{padding-left:0}.prose-sm tbody td:last-child{padding-right:0}.prose-sm>:first-child{margin-top:0}.prose-sm>:last-child{margin-bottom:0}.prose-red a,.prose-red a code{color:#dc2626}.appearance-none{-webkit-appearance:none;-moz-appearance:none;appearance:none}.bg-black{--tw-bg-opacity:1;background-color:rgba(0,0,0,var(--tw-bg-opacity))}.bg-white{--tw-bg-opacity:1;background-color:rgba(255,255,255,var(--tw-bg-opacity))}.bg-gray-100{--tw-bg-opacity:1;background-color:rgba(243,244,246,var(--tw-bg-opacity))}.bg-gray-200{--tw-bg-opacity:1;background-color:rgba(229,231,235,var(--tw-bg-opacity))}.bg-gray-400{--tw-bg-opacity:1;background-color:rgba(156,163,175,var(--tw-bg-opacity))}.bg-red-100{--tw-bg-opacity:1;background-color:rgba(254,226,226,var(--tw-bg-opacity))}.bg-blue-100{--tw-bg-opacity:1;background-color:rgba(219,234,254,var(--tw-bg-opacity))}.hover\:bg-black:hover{--tw-bg-opacity:1;background-color:rgba(0,0,0,var(--tw-bg-opacity))}.hover\:bg-white:hover{--tw-bg-opacity:1;background-color:rgba(255,255,255,var(--tw-bg-opacity))}.hover\:bg-gray-800:hover{--tw-bg-opacity:1;background-color:rgba(31,41,55,var(--tw-bg-opacity))}.hover\:bg-red-500:hover{--tw-bg-opacity:1;background-color:rgba(239,68,68,var(--tw-bg-opacity))}.bg-cover{background-size:cover}.border-black{--tw-border-opacity:1;border-color:rgba(0,0,0,var(--tw-border-opacity))}.border-white{--tw-border-opacity:1;border-color:rgba(255,255,255,var(--tw-border-opacity))}.border-gray-300{--tw-border-opacity:1;border-color:rgba(209,213,219,var(--tw-border-opacity))}.border-gray-400{--tw-border-opacity:1;border-color:rgba(156,163,175,var(--tw-border-opacity))}.border-red-500{--tw-border-opacity:1;border-color:rgba(239,68,68,var(--tw-border-opacity))}.hover\:border-white:hover{--tw-border-opacity:1;border-color:rgba(255,255,255,var(--tw-border-opacity))}.hover\:border-gray-400:hover{--tw-border-opacity:1;border-color:rgba(156,163,175,var(--tw-border-opacity))}.rounded-sm{border-radius:.125rem}.rounded{border-radius:.25rem}.rounded-md{border-radius:.375rem}.rounded-full{border-radius:9999px}.border-solid{border-style:solid}.border-dashed{border-style:dashed}.border{border-width:1px}.cursor-pointer{cursor:pointer}.cursor-not-allowed{cursor:not-allowed}.block{display:block}.inline-block{display:inline-block}.flex{display:flex}.table{display:table}.grid{display:grid}.hidden{display:none}.flex-row{flex-direction:row}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-start{align-items:flex-start}.items-center{align-items:center}.items-stretch{align-items:stretch}.self-start{align-self:flex-start}.self-stretch{align-self:stretch}.justify-start{justify-content:flex-start}.justify-end{justify-content:flex-end}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.justify-self-end{justify-self:end}.flex-1{flex:1 1 0%}.font-light{font-weight:300}.font-normal{font-weight:400}.font-medium{font-weight:500}.font-semibold{font-weight:600}.font-bold{font-weight:700}.font-extrabold{font-weight:800}.h-0{height:0}.h-4{height:1rem}.h-5{height:1.25rem}.h-6{height:1.5rem}.h-48{height:12rem}.h-64{height:16rem}.h-96{height:24rem}.h-0\.5{height:.125rem}.h-full{height:100%}.text-xs{font-size:.75rem;line-height:1rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xl{font-size:1.25rem;line-height:1.75rem}.text-2xl{font-size:1.5rem;line-height:2rem}.text-3xl{font-size:1.875rem;line-height:2.25rem}.m-auto{margin:auto}.mr-0{margin-right:0}.mb-0{margin-bottom:0}.mr-1{margin-right:.25rem}.mb-1{margin-bottom:.25rem}.ml-1{margin-left:.25rem}.mt-2{margin-top:.5rem}.mr-2{margin-right:.5rem}.mb-2{margin-bottom:.5rem}.ml-2{margin-left:.5rem}.mt-3{margin-top:.75rem}.mr-3{margin-right:.75rem}.mb-3{margin-bottom:.75rem}.ml-3{margin-left:.75rem}.mr-4{margin-right:1rem}.mb-4{margin-bottom:1rem}.ml-4{margin-left:1rem}.mr-5{margin-right:1.25rem}.ml-5{margin-left:1.25rem}.mt-6{margin-top:1.5rem}.mr-6{margin-right:1.5rem}.mb-6{margin-bottom:1.5rem}.mb-12{margin-bottom:3rem}.mt-16{margin-top:4rem}.max-w-md{max-width:28rem}.max-w-screen-xl{max-width:1280px}.overflow-hidden{overflow:hidden}.overflow-scroll{overflow:scroll}.p-6{padding:1.5rem}.p-8{padding:2rem}.py-1{padding-top:.25rem;padding-bottom:.25rem}.px-1{padding-left:.25rem;padding-right:.25rem}.py-2{padding-top:.5rem;padding-bottom:.5rem}.px-2{padding-left:.5rem;padding-right:.5rem}.py-3{padding-top:.75rem;padding-bottom:.75rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-4{padding-left:1rem;padding-right:1rem}.py-6{padding-top:1.5rem;padding-bottom:1.5rem}.px-6{padding-left:1.5rem;padding-right:1.5rem}.py-12{padding-top:3rem;padding-bottom:3rem}.pb-6{padding-bottom:1.5rem}.static{position:static}.absolute{position:absolute}.relative{position:relative}.inset-y-0{top:0;bottom:0}.left-0{left:0}.resize{resize:both}*{--tw-shadow:0 0 transparent}.shadow-sm{--tw-shadow:0 1px 2px 0 rgba(0,0,0,0.05)}.shadow,.shadow-sm{box-shadow:var(--tw-ring-offset-shadow,0 0 transparent),var(--tw-ring-shadow,0 0 transparent),var(--tw-shadow)}.shadow{--tw-shadow:0 1px 3px 0 rgba(0,0,0,0.1),0 1px 2px 0 rgba(0,0,0,0.06)}.shadow-md{--tw-shadow:0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -1px rgba(0,0,0,0.06)}.shadow-inner,.shadow-md{box-shadow:var(--tw-ring-offset-shadow,0 0 transparent),var(--tw-ring-shadow,0 0 transparent),var(--tw-shadow)}.shadow-inner{--tw-shadow:inset 0 2px 4px 0 rgba(0,0,0,0.06)}.shadow-card{--tw-shadow:0 2px 1px -1px rgba(0,0,0,0.2),0 1px 1px 0 rgba(0,0,0,0.14),0 1px 3px 0 rgba(0,0,0,0.12);box-shadow:var(--tw-ring-offset-shadow,0 0 transparent),var(--tw-ring-shadow,0 0 transparent),var(--tw-shadow)}*{--tw-ring-inset:var(--tw-empty,/*!*/ /*!*/);--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,0.5);--tw-ring-offset-shadow:0 0 transparent;--tw-ring-shadow:0 0 transparent}.text-center{text-align:center}.text-black{--tw-text-opacity:1;color:rgba(0,0,0,var(--tw-text-opacity))}.text-white{--tw-text-opacity:1;color:rgba(255,255,255,var(--tw-text-opacity))}.text-gray-400{--tw-text-opacity:1;color:rgba(156,163,175,var(--tw-text-opacity))}.text-gray-500{--tw-text-opacity:1;color:rgba(107,114,128,var(--tw-text-opacity))}.text-gray-700{--tw-text-opacity:1;color:rgba(55,65,81,var(--tw-text-opacity))}.text-red-500{--tw-text-opacity:1;color:rgba(239,68,68,var(--tw-text-opacity))}.text-red-600{--tw-text-opacity:1;color:rgba(220,38,38,var(--tw-text-opacity))}.text-blue-600{--tw-text-opacity:1;color:rgba(37,99,235,var(--tw-text-opacity))}.hover\:text-black:hover{--tw-text-opacity:1;color:rgba(0,0,0,var(--tw-text-opacity))}.hover\:text-white:hover{--tw-text-opacity:1;color:rgba(255,255,255,var(--tw-text-opacity))}.hover\:text-gray-500:hover{--tw-text-opacity:1;color:rgba(107,114,128,var(--tw-text-opacity))}.hover\:text-red-600:hover{--tw-text-opacity:1;color:rgba(220,38,38,var(--tw-text-opacity))}.uppercase{text-transform:uppercase}.underline{text-decoration:underline}.tracking-wider{letter-spacing:.05em}.tracking-widest{letter-spacing:.1em}.whitespace-nowrap{white-space:nowrap}.w-1{width:.25rem}.w-2{width:.5rem}.w-4{width:1rem}.w-5{width:1.25rem}.w-6{width:1.5rem}.w-10{width:2.5rem}.w-14{width:3.5rem}.w-16{width:4rem}.w-36{width:9rem}.w-64{width:16rem}.w-full{width:100%}.gap-12{gap:3rem}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.transform{--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;transform:translateX(var(--tw-translate-x)) translateY(var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.hover\:scale-105:hover{--tw-scale-x:1.05;--tw-scale-y:1.05}.hover\:-translate-y-1:hover{--tw-translate-y:-0.25rem}.transition{transition-property:background-color,border-color,color,fill,stroke,opacity,box-shadow,transform,filter,-webkit-backdrop-filter;transition-property:background-color,border-color,color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-property:background-color,border-color,color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter,-webkit-backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.ease-in-out{transition-timing-function:cubic-bezier(.4,0,.2,1)}.duration-200{transition-duration:.2s}.duration-500{transition-duration:.5s}@-webkit-keyframes spin{to{transform:rotate(1turn)}}@keyframes spin{to{transform:rotate(1turn)}}@-webkit-keyframes ping{75%,to{transform:scale(2);opacity:0}}@keyframes ping{75%,to{transform:scale(2);opacity:0}}@-webkit-keyframes pulse{50%{opacity:.5}}@keyframes pulse{50%{opacity:.5}}@-webkit-keyframes bounce{0%,to{transform:translateY(-25%);-webkit-animation-timing-function:cubic-bezier(.8,0,1,1);animation-timing-function:cubic-bezier(.8,0,1,1)}50%{transform:none;-webkit-animation-timing-function:cubic-bezier(0,0,.2,1);animation-timing-function:cubic-bezier(0,0,.2,1)}}@keyframes bounce{0%,to{transform:translateY(-25%);-webkit-animation-timing-function:cubic-bezier(.8,0,1,1);animation-timing-function:cubic-bezier(.8,0,1,1)}50%{transform:none;-webkit-animation-timing-function:cubic-bezier(0,0,.2,1);animation-timing-function:cubic-bezier(0,0,.2,1)}}.filter{--tw-blur:var(--tw-empty,/*!*/ /*!*/);--tw-brightness:var(--tw-empty,/*!*/ /*!*/);--tw-contrast:var(--tw-empty,/*!*/ /*!*/);--tw-grayscale:var(--tw-empty,/*!*/ /*!*/);--tw-hue-rotate:var(--tw-empty,/*!*/ /*!*/);--tw-invert:var(--tw-empty,/*!*/ /*!*/);--tw-saturate:var(--tw-empty,/*!*/ /*!*/);--tw-sepia:var(--tw-empty,/*!*/ /*!*/);--tw-drop-shadow:var(--tw-empty,/*!*/ /*!*/);filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.grayscale{--tw-grayscale:grayscale(100%)}@media (min-width:640px){.sm\:prose{color:#374151;max-width:65ch}.sm\:prose [class~=lead]{color:#4b5563;font-size:1.25em;line-height:1.6;margin-top:1.2em;margin-bottom:1.2em}.sm\:prose a{color:#dc2626;text-decoration:underline;font-weight:500}.sm\:prose a:hover{color:#ef4444}.sm\:prose strong{color:#111827;font-weight:600}.sm\:prose ol[type=A]{--list-counter-style:upper-alpha}.sm\:prose ol[type=a]{--list-counter-style:lower-alpha}.sm\:prose ol[type=I]{--list-counter-style:upper-roman}.sm\:prose ol[type=i]{--list-counter-style:lower-roman}.sm\:prose ol[type="1"]{--list-counter-style:decimal}.sm\:prose ol>li{position:relative;padding-left:1.75em}.sm\:prose ol>li:before{content:counter(list-item,var(--list-counter-style,decimal)) ".";position:absolute;font-weight:400;color:#6b7280;left:0}.sm\:prose ul>li{position:relative;padding-left:1.75em}.sm\:prose ul>li:before{content:"";position:absolute;background-color:#d1d5db;border-radius:50%;width:.375em;height:.375em;top:.6875em;left:.25em}.sm\:prose hr{border-color:#e5e7eb;border-top-width:1px;margin-top:3em;margin-bottom:3em}.sm\:prose blockquote{font-weight:500;font-style:italic;color:#111827;border-left-width:.25rem;border-left-color:#e5e7eb;quotes:"\201C""\201D""\2018""\2019";margin-top:1.6em;margin-bottom:1.6em;padding-left:1em}.sm\:prose blockquote p:first-of-type:before{content:open-quote}.sm\:prose blockquote p:last-of-type:after{content:close-quote}.sm\:prose h1{color:#111827;font-weight:800;font-size:2.25em;margin-top:0;margin-bottom:.8888889em;line-height:1.1111111}.sm\:prose h2{color:#111827;font-weight:700;font-size:1.5em;margin-top:2em;margin-bottom:1em;line-height:1.3333333}.sm\:prose h3{font-size:1.25em;margin-top:1.6em;margin-bottom:.6em;line-height:1.6}.sm\:prose h3,.sm\:prose h4{color:#111827;font-weight:600}.sm\:prose h4{margin-top:1.5em;margin-bottom:.5em;line-height:1.5}.sm\:prose figure figcaption{color:#6b7280;font-size:.875em;line-height:1.4285714;margin-top:.8571429em}.sm\:prose code{color:#111827;font-weight:600;font-size:.875em}.sm\:prose code:after,.sm\:prose code:before{content:"`"}.sm\:prose a code{color:#111827}.sm\:prose pre{color:#e5e7eb;background-color:#1f2937;overflow-x:auto;font-size:.875em;line-height:1.7142857;margin-top:1.7142857em;margin-bottom:1.7142857em;border-radius:.375rem;padding:.8571429em 1.1428571em}.sm\:prose pre code{background-color:transparent;border-width:0;border-radius:0;padding:0;font-weight:400;color:inherit;font-size:inherit;font-family:inherit;line-height:inherit}.sm\:prose pre code:after,.sm\:prose pre code:before{content:none}.sm\:prose table{width:100%;table-layout:auto;text-align:left;margin-top:2em;margin-bottom:2em;font-size:.875em;line-height:1.7142857}.sm\:prose thead{color:#111827;font-weight:600;border-bottom-width:1px;border-bottom-color:#d1d5db}.sm\:prose thead th{vertical-align:bottom;padding-right:.5714286em;padding-bottom:.5714286em;padding-left:.5714286em}.sm\:prose tbody tr{border-bottom-width:1px;border-bottom-color:#e5e7eb}.sm\:prose tbody tr:last-child{border-bottom-width:0}.sm\:prose tbody td{vertical-align:top;padding:.5714286em}.sm\:prose{font-size:1rem;line-height:1.75}.sm\:prose p{margin-top:1.25em;margin-bottom:1.25em}.sm\:prose figure,.sm\:prose img,.sm\:prose video{margin-top:2em;margin-bottom:2em}.sm\:prose figure>*{margin-top:0;margin-bottom:0}.sm\:prose h2 code{font-size:.875em}.sm\:prose h3 code{font-size:.9em}.sm\:prose ol,.sm\:prose ul{margin-top:1.25em;margin-bottom:1.25em}.sm\:prose li{margin-top:.5em;margin-bottom:.5em}.sm\:prose>ul>li p{margin-top:.75em;margin-bottom:.75em}.sm\:prose>ul>li>:first-child{margin-top:1.25em}.sm\:prose>ul>li>:last-child{margin-bottom:1.25em}.sm\:prose>ol>li>:first-child{margin-top:1.25em}.sm\:prose>ol>li>:last-child{margin-bottom:1.25em}.sm\:prose ol ol,.sm\:prose ol ul,.sm\:prose ul ol,.sm\:prose ul ul{margin-top:.75em;margin-bottom:.75em}.sm\:prose h2+*,.sm\:prose h3+*,.sm\:prose h4+*,.sm\:prose hr+*{margin-top:0}.sm\:prose thead th:first-child{padding-left:0}.sm\:prose thead th:last-child{padding-right:0}.sm\:prose tbody td:first-child{padding-left:0}.sm\:prose tbody td:last-child{padding-right:0}.sm\:prose>:first-child{margin-top:0}.sm\:prose>:last-child{margin-bottom:0}.sm\:flex{display:flex}.sm\:flex-row{flex-direction:row}.sm\:items-start{align-items:flex-start}.sm\:items-center{align-items:center}.sm\:flex-1{flex:1 1 0%}.sm\:h-auto{height:auto}.sm\:mb-0{margin-bottom:0}.sm\:mr-6{margin-right:1.5rem}.sm\:px-12{padding-left:3rem;padding-right:3rem}.sm\:text-left{text-align:left}.sm\:w-2\/5{width:40%}.sm\:w-3\/5{width:60%}.sm\:grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}}@media (min-width:1024px){.lg\:w-1\/4{width:25%}.lg\:w-3\/4{width:75%}.lg\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}}code[class*=language-],pre[class*=language-]{color:#f8f8f2;background:none;text-shadow:0 1px rgba(0,0,0,.3);font-family:Consolas,Monaco,Andale Mono,Ubuntu Mono,monospace;font-size:1em;text-align:left;white-space:pre;word-spacing:normal;word-break:normal;word-wrap:normal;line-height:1.5;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-hyphens:none;-ms-hyphens:none;hyphens:none}pre[class*=language-]{padding:1em;margin:.5em 0;overflow:auto;border-radius:.3em}:not(pre)>code[class*=language-],pre[class*=language-]{background:#272822}:not(pre)>code[class*=language-]{padding:.1em;border-radius:.3em;white-space:normal}.token.cdata,.token.comment,.token.doctype,.token.prolog{color:#8292a2}.token.punctuation{color:#f8f8f2}.token.namespace{opacity:.7}.token.constant,.token.deleted,.token.property,.token.symbol,.token.tag{color:#f92672}.token.boolean,.token.number{color:#ae81ff}.token.attr-name,.token.builtin,.token.char,.token.inserted,.token.selector,.token.string{color:#a6e22e}.language-css .token.string,.style .token.string,.token.entity,.token.operator,.token.url,.token.variable{color:#f8f8f2}.token.atrule,.token.attr-value,.token.class-name,.token.function{color:#e6db74}.token.keyword{color:#66d9ef}.token.important,.token.regex{color:#fd971f}.token.bold,.token.important{font-weight:700}.token.italic{font-style:italic}.token.entity{cursor:help}</style><meta name="generator" content="Gatsby 3.4.0"/><title data-react-helmet="true">Generating cooking recipes using TensorFlow and LSTM Recurrent Neural Network: A step-by-step guide | Trekhleb</title><meta data-react-helmet="true" name="description" content="I&#x27;ve trained a character-level LSTM RNN on ~100k recipes dataset using TensorFlow, and it suggested me to cook Cream Soda with Onions, Puff Pastry Strawberry Soup, Zucchini flavor Tea and Salmon Mousse of Beef and Stilton Salad with Jalapenos"/><meta data-react-helmet="true" name="image" content="https://trekhleb.dev/static/05340351fcb1004d0e25300d243978cd/e7ab0/01-cover.jpg"/><meta data-react-helmet="true" property="og:title" content="Generating cooking recipes using TensorFlow and LSTM Recurrent Neural Network: A step-by-step guide | Trekhleb"/><meta data-react-helmet="true" property="og:description" content="I&#x27;ve trained a character-level LSTM RNN on ~100k recipes dataset using TensorFlow, and it suggested me to cook Cream Soda with Onions, Puff Pastry Strawberry Soup, Zucchini flavor Tea and Salmon Mousse of Beef and Stilton Salad with Jalapenos"/><meta data-react-helmet="true" property="og:url" content="https://trekhleb.dev/blog/2020/recipes-generation/"/><meta data-react-helmet="true" property="og:image" content="https://trekhleb.dev/static/05340351fcb1004d0e25300d243978cd/e7ab0/01-cover.jpg"/><meta data-react-helmet="true" property="og:type" content="article"/><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"/><meta data-react-helmet="true" name="twitter:creator" content="@Trekhleb"/><meta data-react-helmet="true" name="twitter:title" content="Generating cooking recipes using TensorFlow and LSTM Recurrent Neural Network: A step-by-step guide | Trekhleb"/><meta data-react-helmet="true" name="twitter:description" content="I&#x27;ve trained a character-level LSTM RNN on ~100k recipes dataset using TensorFlow, and it suggested me to cook Cream Soda with Onions, Puff Pastry Strawberry Soup, Zucchini flavor Tea and Salmon Mousse of Beef and Stilton Salad with Jalapenos"/><meta data-react-helmet="true" name="twitter:image" content="https://trekhleb.dev/static/05340351fcb1004d0e25300d243978cd/e7ab0/01-cover.jpg"/><meta data-react-helmet="true" name="twitter:url" content="https://trekhleb.dev/blog/2020/recipes-generation/"/><link rel="preconnect dns-prefetch" href="https://www.google-analytics.com"/><link rel="alternate" type="application/rss+xml" title="Trekhleb.dev RSS Feed" href="/rss.xml"/><link as="script" rel="preload" href="/webpack-runtime-98886e4c056db07027a1.js"/><link as="script" rel="preload" href="/framework-d63adeb7e1b44b7b8aa5.js"/><link as="script" rel="preload" href="/app-2e0826ec06cafce3bdee.js"/><link as="script" rel="preload" href="/commons-213c962999d4e181c8a0.js"/><link as="script" rel="preload" href="/component---src-templates-post-tsx-c4045391b1a7c095d609.js"/><link as="fetch" rel="preload" href="/page-data/blog/2020/recipes-generation/page-data.json" crossorigin="anonymous"/><link as="fetch" rel="preload" href="/page-data/app-data.json" crossorigin="anonymous"/></head><body><div id="___gatsby"><div style="outline:none" tabindex="-1" id="gatsby-focus-wrapper"><main class="flex flex-col items-center"><div class="max-w-screen-xl self-stretch m-auto w-full"><header class="flex flex-row items-center px-6 sm:px-12 py-6"><div class="mr-6"><div><a class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 font-extrabold text-sm tracking-widest uppercase" href="/">Trekhleb</a></div></div><nav><ul class="flex flex-row"><li class="ml-5"><a class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 uppercase text-xs" href="/">About</a></li><li class="ml-5"><a class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 uppercase text-xs" href="/projects/">Projects</a></li><li class="ml-5"><a class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 uppercase text-xs" href="/blog/">Blog</a></li></ul></nav></header><article class="px-6 sm:px-12 py-6"><div class="flex flex-col items-center"><article class="w-full prose prose-sm sm:prose overflow-hidden prose-red" style="max-width:860px"><h1 class="text-3xl mb-6 uppercase font-extrabold ">Generating cooking recipes using TensorFlow and LSTM Recurrent Neural Network: A step-by-step guide</h1><div class="flex flex-row items-center "><div class="flex flex-row items-center mr-6"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="mr-1" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><rect x="3" y="4" width="18" height="18" rx="2" ry="2"></rect><line x1="16" y1="2" x2="16" y2="6"></line><line x1="8" y1="2" x2="8" y2="6"></line><line x1="3" y1="10" x2="21" y2="10"></line></svg>18 June, 2020</div><div class="flex flex-row items-center "><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="mr-1" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><circle cx="12" cy="12" r="10"></circle><polyline points="12 6 12 12 16 14"></polyline></svg>13<!-- --> min to read</div></div><h2 id="tldr" style="position:relative">TL;DR<a href="#tldr" aria-label="tldr permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>I&#x27;ve trained a character-level LSTM <em>(Long short-term memory)</em> RNN <em>(Recurrent Neural Network)</em> on <em>~100k</em> recipes dataset using TensorFlow, and it suggested me to cook <em>&quot;Cream Soda with Onions&quot;</em>, <em>&quot;Puff Pastry Strawberry Soup&quot;</em>, <em>&quot;Zucchini flavor Tea&quot;</em> and <em>&quot;Salmon Mousse of Beef and Stilton Salad with Jalapenos&quot;</em> .</p><p>Here you may find more examples of what I ended up with:</p><ul><li>🎨 <a href="https://trekhleb.dev/machine-learning-experiments/#/experiments/RecipeGenerationRNN">Cooking recipes generator demo</a> - to try the model interactively right in your browser.</li><li>🏋🏻‍ <a href="https://github.com/trekhleb/machine-learning-experiments/blob/master/experiments/recipe_generation_rnn/recipe_generation_rnn.ipynb">LSTM model training process</a> - to see how the model was trained.</li><li><a href="https://github.com/trekhleb/machine-learning-experiments">🤖 Interactive Machine Learning Experiments</a> repository - to see more experiments with &quot;Objects detection&quot;, &quot;Sketch Recognition&quot;, &quot;Image Classification&quot; etc.</li></ul><p>This article contains details of how the LSTM model was actually trained on Python using <a href="https://www.tensorflow.org/">TensorFlow 2</a> with <a href="https://www.tensorflow.org/guide/keras">Keras API</a>.</p><p><img src="/posts-assets/4cdea1757c71da83958f357939b859e2/02-demo.gif" alt="Cooking recipes generator demo"/></p><h2 id="what-our-model-will-eventually-learn" style="position:relative">What our model will eventually learn<a href="#what-our-model-will-eventually-learn" aria-label="what our model will eventually learn permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>For a couple of hours of training our character-level RNN model will learn basic concepts of English grammar and punctuation (I wish I could learn English that fast!). It will also learn how to generate different parts of recipes such as <em>📗 <!-- -->[RECIPE NAME]</em>, <em>🥕 <!-- -->[RECIPE INGREDIENTS]</em> and <em>📝 <!-- -->[RECIPE INSTRUCTIONS]</em>. Sometimes recipe name, ingredients and instructions will be pretty interesting, sometimes stupid, sometimes fun.</p><p>Here is a couple of generated recipes examples:</p><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">📗 [NAME]

Orange Club Tea Sandwich Cookies

🥕 [INGREDIENTS]

• 1 cup (2 sticks) unsalted butter, softened
• 1 cup confectioners&#x27; sugar
• 1/2 cup flaxseed meal
• 1/2 cup shelled pumpkin seeds (pecans, blanched and sliced)
• 2 teaspoons vanilla extract

📝 [INSTRUCTIONS]

▪︎ Preheat oven to 350 degrees F.
▪︎ Combine cake mix, milk, egg and sugar in a large bowl. Stir until combined and smooth but not sticky. Using a spatula, sprinkle the dough biscuits over the bottom of the pan. Sprinkle with sugar, and spread evenly. Bake for 20 minutes. Remove from the oven and cool on a rack. To serve, add the chocolate.</code></pre></div><p>Or another one:</p><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">📗 [NAME]

Mushrooms with Lentil Stewed Shallots and Tomatoes

🥕 [INGREDIENTS]

• 1 tablespoon olive oil
• 3 cloves garlic, smashed
• Kosher salt
• 1 1/2 pounds lean ground turkey
• 1 cup coarsely peeled tart apples
• 2 tablespoons chopped garlic
• 1 teaspoon ground cumin
• 1/2 teaspoon cayenne pepper
• 1 teaspoon chopped fresh thyme
• 3/4 cup chopped fresh basil
• 1/2 small carrot, halved lengthwise and cut into 1/2-inch pieces
• 1 roasted red pepper, halved and sliced vertically diced and separated into rough chops
• 3 tablespoons unsalted butter
• 2 cups shredded mozzarella
• 1/4 cup grated parmesan cheese
• 1/4 cup prepared basil pesto

📝 [INSTRUCTIONS]

▪︎ Stir the olive oil, garlic, thyme and 1 teaspoon salt in a saucepan; bring to a simmer over medium heat. Remove from the heat. Add the basil and toast the soup for 2 minutes.
▪︎ Meanwhile, heat 4 to 4 inches vegetable oil in the skillet over medium-high heat. Add the olive oil, garlic, 1/2 teaspoon salt and 1/2 teaspoon pepper and cook, stirring often, until cooked through, a</code></pre></div><p><span class="gatsby-resp-image-wrapper" style="position:relative;display:block;margin-left:auto;margin-right:auto;max-width:946px">
      <span class="gatsby-resp-image-background-image" style="padding-bottom:100%;position:relative;bottom:0;left:0;background-image:url(&#x27;data:image/jpeg;base64,/9j/2wBDABALDA4MChAODQ4SERATGCgaGBYWGDEjJR0oOjM9PDkzODdASFxOQERXRTc4UG1RV19iZ2hnPk1xeXBkeFxlZ2P/2wBDARESEhgVGC8aGi9jQjhCY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2P/wgARCAAUABQDASIAAhEBAxEB/8QAFwABAQEBAAAAAAAAAAAAAAAAAAMCAf/EABYBAQEBAAAAAAAAAAAAAAAAAAEDAP/aAAwDAQACEAMQAAABrlaVossduOwJv//EABoQAQEAAwEBAAAAAAAAAAAAAAECABIhMkH/2gAIAQEAAQUCjgQKUzj11NfRHM1Jz7//xAAVEQEBAAAAAAAAAAAAAAAAAAAQAf/aAAgBAwEBPwEp/8QAFhEBAQEAAAAAAAAAAAAAAAAAEAIx/9oACAECAQE/AScP/8QAHxAAAgECBwAAAAAAAAAAAAAAABEBAhIQISIxMkFR/9oACAEBAAY/AlJya6JFTU0VW6Z9Im02MsP/xAAcEAEAAgMBAQEAAAAAAAAAAAABABEhMUFhUXH/2gAIAQEAAT8hv9TdktQAKYQHvWVCxAp24HgIWhg3C1KOwP7yOcMr32O0/9oADAMBAAIAAwAAABDzPwL/xAAXEQADAQAAAAAAAAAAAAAAAAABEBEx/9oACAEDAQE/EKJVtf/EABgRAQADAQAAAAAAAAAAAAAAAAEAEBEx/9oACAECAQE/EExyYlD2f//EABwQAQEAAgMBAQAAAAAAAAAAAAERACExUWFBcf/aAAgBAQABPxBtIEzYvd3hFCENUe57kUu2zg8wBSIn6J2R4yvubIPfMMUKB3b7kNkmovL5gJ3Qqn0XrCQhBnBn/9k=&#x27;);background-size:cover;display:block"></span>
  <img class="gatsby-resp-image-image" alt="Cook real recipes, not with generated ones" title="Cook real recipes, not with generated ones" src="/static/bd0d3256b5e96aafe4d1ee2392dd3b59/56d85/09.jpg" srcSet="/static/bd0d3256b5e96aafe4d1ee2392dd3b59/0479a/09.jpg 250w,/static/bd0d3256b5e96aafe4d1ee2392dd3b59/41099/09.jpg 500w,/static/bd0d3256b5e96aafe4d1ee2392dd3b59/56d85/09.jpg 946w" sizes="(max-width: 946px) 100vw, 946px" style="width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0" loading="lazy"/>
    </span></p><p>⚠️ The recipes in this article are generated just for fun and for learning purposes. The recipes are <strong>not</strong> for actual cooking! If you want some real recipes you may check 🥦 <a href="https://www.instagram.com/home_full_of_recipes/">home_full_of_recipes</a> Instagram channel.</p><h2 id="prior-knowledge" style="position:relative">Prior knowledge<a href="#prior-knowledge" aria-label="prior knowledge permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>It is assumed that you&#x27;re already familiar with concepts of <a href="https://en.wikipedia.org/wiki/Recurrent_neural_network">Recurrent Neural Networks (RNNs)</a> and with <a href="https://en.wikipedia.org/wiki/Long_short-term_memory">Long short-term memory (LSTM)</a> architecture in particular.</p><p>ℹ️ In case if these concepts are new to you I would highly recommend taking a <a href="https://www.coursera.org/specializations/deep-learning">Deep Learning Specialization</a> on Coursera by <em>Andrew Ng</em>. It also might be beneficial to go through the <a href="http://karpathy.github.io/2015/05/21/rnn-effectiveness/">Unreasonable Effectiveness of Recurrent Neural Networks</a> article by <em>Andrej Karpathy</em>.</p><p>On a high level, <strong>Recurrent Neural Network (RNN)</strong> is a class of deep neural networks, most commonly applied to sequence-based data like speech, voice, text or music. They are used for machine translation, speech recognition, voice synthesis etc. The key feature of RNNs is that they are stateful, and they have an internal memory in which some context for the sequence may be stored. For example if the first word of the sequence was <code class="language-text">He</code> the RNN might suggest the next word to <code class="language-text">speaks</code> instead of just <code class="language-text">speak</code> (to form a <code class="language-text">He speaks</code> phrase) because the prior knowledge about the first word <code class="language-text">He</code> is already inside the internal memory.</p><p><img src="/posts-assets/79e174934c03239fc046bf55357ce7bd/0.svg" alt="Recurrent Neural Network"/></p><blockquote><p><em>Image source: <a href="https://en.wikipedia.org/wiki/Recurrent_neural_network">Wikipedia</a></em></p></blockquote><p><span class="gatsby-resp-image-wrapper" style="position:relative;display:block;margin-left:auto;margin-right:auto;max-width:1000px">
      <span class="gatsby-resp-image-background-image" style="padding-bottom:63.6%;position:relative;bottom:0;left:0;background-image:url(&#x27;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAANCAIAAAAmMtkJAAAACXBIWXMAAAsTAAALEwEAmpwYAAACNklEQVQoz21SW08aQRTef8tL/Qs+tjapmrRNaaVeKrUKQSVGlGy91bCKICgrltpVayLE5TLD3mZ3lp2Z08CCmrTnZeZcvjlnzvdJMDJCyM3tLWMM/mdCCADQdR1j/OhK/RMZQu8Ij3LGQQBgA1AXBukRFDg2RLPNPMo4fwxLXIiWgfROC3ukG1BESctEutEJnhUBgO951LFF/x3BQPD+BSTcxTMHW2/3NmYrucnN5dnLo0+nyrvj3d93NzDoHQRBqlqYUeTEVSm6sxFT5M/5nQ/Hcu3hXjKp++1kL5VNL2XWxleSmcLem839ySPlweoM5gUueOLX6fjKbLz0I5pdnc6mXm2tT5/s/rGR1OW9ZPFAjs+nU8mXh+XMWiKaSUyVdhuuGS5GAKS084mN5fXFufR6YvtwayKvvq9WGp4hdV3n4+H24uXJF1X5Ws0taaX0/c85VbnWG49bTV2Vp76nF85zseL+fEVZuDyO357fE0MCAJsQ5vvUNHkv6BFCbZuL57sGy3W7xHGoh9tt3G7ZruMEPca5NMw7RHSwMCxAWHQwBEz8y3XABiwa4PeGVAEAdz1O/aFUAvD5UBWO44Sa8X3fsqwR5U82GDsWt2fiYep1tr1aNvpBh0QiEVVVAUCW5bEXYwDAAnataZZpPoGbavXhrBL6Z9fNu6YVdisWi6EY6/V6uVwOCxBCnueNwEIYhFieG/o9SnjQ/5Kmablc7uLiAgBqtVo+n6eUIoQKhYKqqq7br/8L0Ma9q43PlL8AAAAASUVORK5CYII=&#x27;);background-size:cover;display:block"></span>
  <img class="gatsby-resp-image-image" alt="Basic architectures of GRU and LSTM cells" title="Basic architectures of GRU and LSTM cells" src="/static/11ecf20ea5599f22b8bf1c392290aefe/00d43/1.png" srcSet="/static/11ecf20ea5599f22b8bf1c392290aefe/63868/1.png 250w,/static/11ecf20ea5599f22b8bf1c392290aefe/0b533/1.png 500w,/static/11ecf20ea5599f22b8bf1c392290aefe/00d43/1.png 1000w,/static/11ecf20ea5599f22b8bf1c392290aefe/2cefc/1.png 1400w" sizes="(max-width: 1000px) 100vw, 1000px" style="width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0" loading="lazy"/>
    </span></p><blockquote><p><em>Image source: <a href="https://towardsdatascience.com/illustrated-guide-to-lstms-and-gru-s-a-step-by-step-explanation-44e9eb85bf21">Towards Data Science</a></em></p></blockquote><p>Exciting part is that RNN (and LSTM in particular) could memorize not only <em>word-to-word</em> dependencies but also <em>character-to-character</em> dependencies! It doesn&#x27;t really matter what sequence consists of: it might be words it might be characters. What is important is that they form a time-distributed sequence. For example, we have a sequence of characters <code class="language-text">[&#x27;H&#x27;, &#x27;e&#x27;]</code>. If we ask LSTM what may go next it may suggest a <code class="language-text">&lt;stop_word&gt;</code> (meaning, that the sequence that forms word <code class="language-text">He</code> is already complete, and we may stop), or it may also suggest a character <code class="language-text">l</code> (meaning, that it tries to build a <code class="language-text">Hello</code> sequence for us). This type of RNNs are called <strong>character-level RNNs</strong> (as opposed to <strong>word-level RNNs</strong>).</p><p>In this tutorial we will rely on this memorization feature of RNN networks, and we will use a character-level version of LSTM to generate cooking recipes.</p><h2 id="exploring-the-datasets" style="position:relative">Exploring the datasets<a href="#exploring-the-datasets" aria-label="exploring the datasets permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>Let&#x27;s go through several available datasets and explore their pros and cons. One of the requirements I want the dataset to meet is that it should have not only a list of ingredients but also a cooking instruction. I also want it to have a measures and quantities for each ingredient.</p><p>Here are several cooking recipes datasets I&#x27;ve found:</p><ul><li>🤷 <a href="https://www.kaggle.com/kaggle/recipe-ingredients-dataset/home">Recipe Ingredients Dataset</a> <em>(doesn&#x27;t have ingredients proportions)</em></li><li>🤷 <a href="http://pic2recipe.csail.mit.edu/">Recipe1M+</a> <em>(a lot of recipes but requires registration to download)</em></li><li>🤷 <a href="https://www.kaggle.com/hugodarwood/epirecipes?select=full_format_recipes.json">Epicurious - Recipes with Rating and Nutrition</a> <em>(~20k recipes only, it would be nice to find more)</em></li><li>👍🏻 <a href="https://eightportions.com/datasets/Recipes/">Recipe box</a> <em>(~125,000 recipes with ingredients proportions, good)</em></li></ul><p>Let&#x27;s try to use the &quot;Recipe box&quot; dataset. The number of recipes looks big enough, also it contains both ingredients and cooking instructions. It is interesting to see if RNN will be able to learn a connection between ingredients and instructions.</p><h2 id="setting-tensorflowpython-sandbox-for-training" style="position:relative">Setting TensorFlow/Python sandbox for training<a href="#setting-tensorflowpython-sandbox-for-training" aria-label="setting tensorflowpython sandbox for training permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>There are several options you may follow to experiment with the code in this tutorial:</p><ol><li>You may experiment by using <a href="https://colab.research.google.com/github/trekhleb/machine-learning-experiments/blob/master/experiments/recipe_generation_rnn/recipe_generation_rnn.ipynb">GoogleColab right in your browser</a> <em>(no local setup is needed)</em>.</li><li>You may experiment by using <a href="https://mybinder.org/v2/gh/trekhleb/machine-learning-experiments/master?filepath=experiments/recipe_generation_rnn/recipe_generation_rnn.ipynb">Jupyter notebook in Binder right in your browser</a> <em>(no local setup is needed)</em>.</li><li>You may <a href="https://github.com/trekhleb/machine-learning-experiments#how-to-use-this-repository-locally">setup a Jupyter notebook locally</a>.</li></ol><p>I would suggest going with GoogleColab option since it doesn&#x27;t require any local setup for you (you may experiment right in your browser), and it also provides a powerful GPU support for training that will make the model to train faster. You will be able to experiment with training parameters as well.</p><h2 id="importing-dependencies" style="position:relative">Importing dependencies<a href="#importing-dependencies" aria-label="importing dependencies permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>Let&#x27;s start with importing some packages that we will use afterwards.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token comment"># Packages for training the model and working with the dataset.</span>
<span class="token keyword">import</span> tensorflow <span class="token keyword">as</span> tf
<span class="token keyword">import</span> matplotlib<span class="token punctuation">.</span>pyplot <span class="token keyword">as</span> plt
<span class="token keyword">import</span> numpy <span class="token keyword">as</span> np
<span class="token keyword">import</span> json

<span class="token comment"># Utility/helper packages.</span>
<span class="token keyword">import</span> platform
<span class="token keyword">import</span> time
<span class="token keyword">import</span> pathlib
<span class="token keyword">import</span> os</code></pre></div><p>First, let&#x27;s make sure our environment is properly set up and that we&#x27;re using a <em>2nd</em> version of Tensorflow.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Python version:&#x27;</span><span class="token punctuation">,</span> platform<span class="token punctuation">.</span>python_version<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Tensorflow version:&#x27;</span><span class="token punctuation">,</span> tf<span class="token punctuation">.</span>__version__<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Keras version:&#x27;</span><span class="token punctuation">,</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>__version__<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Python version: 3.7.6
Tensorflow version: 2.1.0
Keras version: 2.2.4-tf</code></pre></div></blockquote><h2 id="loading-the-dataset" style="position:relative">Loading the dataset<a href="#loading-the-dataset" aria-label="loading the dataset permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>Let&#x27;s load the dataset using <a href="https://www.tensorflow.org/api_docs/python/tf/keras/utils/get_file">tf.keras.utils.get_file</a>. Using <code class="language-text">get_file()</code> utility is convenient because it handles caching for you out of the box. It means that you will download the dataset files only once and then even if you launch the same code block in the notebook once again it will use cache, and the code block will be executed faster.</p><p>Create cache folder if it not exists:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">CACHE_DIR <span class="token operator">=</span> <span class="token string">&#x27;./tmp&#x27;</span>
pathlib<span class="token punctuation">.</span>Path<span class="token punctuation">(</span>CACHE_DIR<span class="token punctuation">)</span><span class="token punctuation">.</span>mkdir<span class="token punctuation">(</span>exist_ok<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">)</span></code></pre></div><p>Download and unpack the dataset:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">dataset_file_name <span class="token operator">=</span> <span class="token string">&#x27;recipes_raw.zip&#x27;</span>
dataset_file_origin <span class="token operator">=</span> <span class="token string">&#x27;https://storage.googleapis.com/recipe-box/recipes_raw.zip&#x27;</span>

dataset_file_path <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>utils<span class="token punctuation">.</span>get_file<span class="token punctuation">(</span>
    fname<span class="token operator">=</span>dataset_file_name<span class="token punctuation">,</span>
    origin<span class="token operator">=</span>dataset_file_origin<span class="token punctuation">,</span>
    cache_dir<span class="token operator">=</span>CACHE_DIR<span class="token punctuation">,</span>
    extract<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
    archive_format<span class="token operator">=</span><span class="token string">&#x27;zip&#x27;</span>
<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span>dataset_file_path<span class="token punctuation">)</span></code></pre></div><p>Here is a path to dataset file after it has been downloaded:</p><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="shell"><pre class="language-shell"><code class="language-shell">./tmp/datasets/recipes_raw.zip</code></pre></div></blockquote><p>Let&#x27;s print the cache folder and see what exactly has been downloaded:</p><div class="gatsby-highlight" data-language="shell"><pre class="language-shell"><code class="language-shell"><span class="token operator">!</span>ls -la ./tmp/datasets/</code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">total 521128
drwxr-xr-x  7        224 May 13 18:10 .
drwxr-xr-x  4        128 May 18 18:00 ..
-rw-r--r--  1      20437 May 20 06:46 LICENSE
-rw-r--r--  1   53355492 May 13 18:10 recipes_raw.zip
-rw-r--r--  1   49784325 May 20 06:46 recipes_raw_nosource_ar.json
-rw-r--r--  1   61133971 May 20 06:46 recipes_raw_nosource_epi.json
-rw-r--r--  1   93702755 May 20 06:46 recipes_raw_nosource_fn.json</code></pre></div></blockquote><p>As you may see, the dataset consists of <em>3</em> files. We need to merge information from those <em>3</em> files into one dataset later.</p><p>Let&#x27;s load datasets data from <code class="language-text">json</code> files and preview examples from them.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">load_dataset</span><span class="token punctuation">(</span>silent<span class="token operator">=</span><span class="token boolean">False</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token comment"># List of dataset files we want to merge.</span>
    dataset_file_names <span class="token operator">=</span> <span class="token punctuation">[</span>
        <span class="token string">&#x27;recipes_raw_nosource_ar.json&#x27;</span><span class="token punctuation">,</span>
        <span class="token string">&#x27;recipes_raw_nosource_epi.json&#x27;</span><span class="token punctuation">,</span>
        <span class="token string">&#x27;recipes_raw_nosource_fn.json&#x27;</span><span class="token punctuation">,</span>
    <span class="token punctuation">]</span>

    dataset <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>

    <span class="token keyword">for</span> dataset_file_name <span class="token keyword">in</span> dataset_file_names<span class="token punctuation">:</span>
        dataset_file_path <span class="token operator">=</span> <span class="token string-interpolation"><span class="token string">f&#x27;</span><span class="token interpolation"><span class="token punctuation">{</span>CACHE_DIR<span class="token punctuation">}</span></span><span class="token string">/datasets/</span><span class="token interpolation"><span class="token punctuation">{</span>dataset_file_name<span class="token punctuation">}</span></span><span class="token string">&#x27;</span></span>

        <span class="token keyword">with</span> <span class="token builtin">open</span><span class="token punctuation">(</span>dataset_file_path<span class="token punctuation">)</span> <span class="token keyword">as</span> dataset_file<span class="token punctuation">:</span>
            json_data_dict <span class="token operator">=</span> json<span class="token punctuation">.</span>load<span class="token punctuation">(</span>dataset_file<span class="token punctuation">)</span>
            json_data_list <span class="token operator">=</span> <span class="token builtin">list</span><span class="token punctuation">(</span>json_data_dict<span class="token punctuation">.</span>values<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
            dict_keys <span class="token operator">=</span> <span class="token punctuation">[</span>key <span class="token keyword">for</span> key <span class="token keyword">in</span> json_data_list<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">]</span>
            dict_keys<span class="token punctuation">.</span>sort<span class="token punctuation">(</span><span class="token punctuation">)</span>
            dataset <span class="token operator">+=</span> json_data_list

            <span class="token comment"># This code block outputs the summary for each dataset.</span>
            <span class="token keyword">if</span> silent <span class="token operator">==</span> <span class="token boolean">False</span><span class="token punctuation">:</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span>dataset_file_path<span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;===========================================&#x27;</span><span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Number of examples: &#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>json_data_list<span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">&#x27;\n&#x27;</span><span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Example object keys:\n&#x27;</span><span class="token punctuation">,</span> dict_keys<span class="token punctuation">,</span> <span class="token string">&#x27;\n&#x27;</span><span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Example object:\n&#x27;</span><span class="token punctuation">,</span> json_data_list<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span> <span class="token string">&#x27;\n&#x27;</span><span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Required keys:\n&#x27;</span><span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;  title: &#x27;</span><span class="token punctuation">,</span> json_data_list<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">[</span><span class="token string">&#x27;title&#x27;</span><span class="token punctuation">]</span><span class="token punctuation">,</span> <span class="token string">&#x27;\n&#x27;</span><span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;  ingredients: &#x27;</span><span class="token punctuation">,</span> json_data_list<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">[</span><span class="token string">&#x27;ingredients&#x27;</span><span class="token punctuation">]</span><span class="token punctuation">,</span> <span class="token string">&#x27;\n&#x27;</span><span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;  instructions: &#x27;</span><span class="token punctuation">,</span> json_data_list<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">[</span><span class="token string">&#x27;instructions&#x27;</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;\n\n&#x27;</span><span class="token punctuation">)</span>

    <span class="token keyword">return</span> dataset

dataset_raw <span class="token operator">=</span> load_dataset<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">./tmp/datasets/recipes_raw_nosource_ar.json
===========================================
Number of examples:  39802

Example object keys:
 [&#x27;ingredients&#x27;, &#x27;instructions&#x27;, &#x27;picture_link&#x27;, &#x27;title&#x27;]

Example object:
 {&#x27;title&#x27;: &#x27;Slow Cooker Chicken and Dumplings&#x27;, &#x27;ingredients&#x27;: [&#x27;4 skinless, boneless chicken breast halves ADVERTISEMENT&#x27;, &#x27;2 tablespoons butter ADVERTISEMENT&#x27;, &#x27;2 (10.75 ounce) cans condensed cream of chicken soup ADVERTISEMENT&#x27;, &#x27;1 onion, finely diced ADVERTISEMENT&#x27;, &#x27;2 (10 ounce) packages refrigerated biscuit dough, torn into pieces ADVERTISEMENT&#x27;, &#x27;ADVERTISEMENT&#x27;], &#x27;instructions&#x27;: &#x27;Place the chicken, butter, soup, and onion in a slow cooker, and fill with enough water to cover.\nCover, and cook for 5 to 6 hours on High. About 30 minutes before serving, place the torn biscuit dough in the slow cooker. Cook until the dough is no longer raw in the center.\n&#x27;, &#x27;picture_link&#x27;: &#x27;55lznCYBbs2mT8BTx6BTkLhynGHzM.S&#x27;}

Required keys:

  title:  Slow Cooker Chicken and Dumplings

  ingredients:  [&#x27;4 skinless, boneless chicken breast halves ADVERTISEMENT&#x27;, &#x27;2 tablespoons butter ADVERTISEMENT&#x27;, &#x27;2 (10.75 ounce) cans condensed cream of chicken soup ADVERTISEMENT&#x27;, &#x27;1 onion, finely diced ADVERTISEMENT&#x27;, &#x27;2 (10 ounce) packages refrigerated biscuit dough, torn into pieces ADVERTISEMENT&#x27;, &#x27;ADVERTISEMENT&#x27;]

  instructions:  Place the chicken, butter, soup, and onion in a slow cooker, and fill with enough water to cover.
Cover, and cook for 5 to 6 hours on High. About 30 minutes before serving, place the torn biscuit dough in the slow cooker. Cook until the dough is no longer raw in the center.


./tmp/datasets/recipes_raw_nosource_epi.json
===========================================
Number of examples:  25323

Example object keys:
 [&#x27;ingredients&#x27;, &#x27;instructions&#x27;, &#x27;picture_link&#x27;, &#x27;title&#x27;]

Example object:
 {&#x27;ingredients&#x27;: [&#x27;12 egg whites&#x27;, &#x27;12 egg yolks&#x27;, &#x27;1 1/2 cups sugar&#x27;, &#x27;3/4 cup rye whiskey&#x27;, &#x27;12 egg whites&#x27;, &#x27;3/4 cup brandy&#x27;, &#x27;1/2 cup rum&#x27;, &#x27;1 to 2 cups heavy cream, lightly whipped&#x27;, &#x27;Garnish: ground nutmeg&#x27;], &#x27;picture_link&#x27;: None, &#x27;instructions&#x27;: &#x27;Beat the egg whites until stiff, gradually adding in 3/4 cup sugar. Set aside. Beat the egg yolks until they are thick and pale and add the other 3/4 cup sugar and stir in rye whiskey. Blend well. Fold the egg white mixture into the yolk mixture and add the brandy and the rum. Beat the mixture well. To serve, fold the lightly whipped heavy cream into the eggnog. (If a thinner mixture is desired, add the heavy cream unwhipped.) Sprinkle the top of the eggnog with the nutmeg to taste.\nBeat the egg whites until stiff, gradually adding in 3/4 cup sugar. Set aside. Beat the egg yolks until they are thick and pale and add the other 3/4 cup sugar and stir in rye whiskey. Blend well. Fold the egg white mixture into the yolk mixture and add the brandy and the rum. Beat the mixture well. To serve, fold the lightly whipped heavy cream into the eggnog. (If a thinner mixture is desired, add the heavy cream unwhipped.) Sprinkle the top of the eggnog with the nutmeg to taste.&#x27;, &#x27;title&#x27;: &#x27;Christmas Eggnog &#x27;}

Required keys:

  title:  Christmas Eggnog

  ingredients:  [&#x27;12 egg whites&#x27;, &#x27;12 egg yolks&#x27;, &#x27;1 1/2 cups sugar&#x27;, &#x27;3/4 cup rye whiskey&#x27;, &#x27;12 egg whites&#x27;, &#x27;3/4 cup brandy&#x27;, &#x27;1/2 cup rum&#x27;, &#x27;1 to 2 cups heavy cream, lightly whipped&#x27;, &#x27;Garnish: ground nutmeg&#x27;]

  instructions:  Beat the egg whites until stiff, gradually adding in 3/4 cup sugar. Set aside. Beat the egg yolks until they are thick and pale and add the other 3/4 cup sugar and stir in rye whiskey. Blend well. Fold the egg white mixture into the yolk mixture and add the brandy and the rum. Beat the mixture well. To serve, fold the lightly whipped heavy cream into the eggnog. (If a thinner mixture is desired, add the heavy cream unwhipped.) Sprinkle the top of the eggnog with the nutmeg to taste.
Beat the egg whites until stiff, gradually adding in 3/4 cup sugar. Set aside. Beat the egg yolks until they are thick and pale and add the other 3/4 cup sugar and stir in rye whiskey. Blend well. Fold the egg white mixture into the yolk mixture and add the brandy and the rum. Beat the mixture well. To serve, fold the lightly whipped heavy cream into the eggnog. (If a thinner mixture is desired, add the heavy cream unwhipped.) Sprinkle the top of the eggnog with the nutmeg to taste.


./tmp/datasets/recipes_raw_nosource_fn.json
===========================================
Number of examples:  60039

Example object keys:
 [&#x27;ingredients&#x27;, &#x27;instructions&#x27;, &#x27;picture_link&#x27;, &#x27;title&#x27;]

Example object:
 {&#x27;instructions&#x27;: &#x27;Toss ingredients lightly and spoon into a buttered baking dish. Top with additional crushed cracker crumbs, and brush with melted butter. Bake in a preheated at 350 degrees oven for 25 to 30 minutes or until delicately browned.&#x27;, &#x27;ingredients&#x27;: [&#x27;1/2 cup celery, finely chopped&#x27;, &#x27;1 small green pepper finely chopped&#x27;, &#x27;1/2 cup finely sliced green onions&#x27;, &#x27;1/4 cup chopped parsley&#x27;, &#x27;1 pound crabmeat&#x27;, &#x27;1 1/4 cups coarsely crushed cracker crumbs&#x27;, &#x27;1/2 teaspoon salt&#x27;, &#x27;3/4 teaspoons dry mustard&#x27;, &#x27;Dash hot sauce&#x27;, &#x27;1/4 cup heavy cream&#x27;, &#x27;1/2 cup melted butter&#x27;], &#x27;title&#x27;: &quot;Grammie Hamblet&#x27;s Deviled Crab&quot;, &#x27;picture_link&#x27;: None}

Required keys:

  title:  Grammie Hamblet&#x27;s Deviled Crab

  ingredients:  [&#x27;1/2 cup celery, finely chopped&#x27;, &#x27;1 small green pepper finely chopped&#x27;, &#x27;1/2 cup finely sliced green onions&#x27;, &#x27;1/4 cup chopped parsley&#x27;, &#x27;1 pound crabmeat&#x27;, &#x27;1 1/4 cups coarsely crushed cracker crumbs&#x27;, &#x27;1/2 teaspoon salt&#x27;, &#x27;3/4 teaspoons dry mustard&#x27;, &#x27;Dash hot sauce&#x27;, &#x27;1/4 cup heavy cream&#x27;, &#x27;1/2 cup melted butter&#x27;]

  instructions:  Toss ingredients lightly and spoon into a buttered baking dish. Top with additional crushed cracker crumbs, and brush with melted butter. Bake in a preheated at 350 degrees oven for 25 to 30 minutes or until delicately browned.</code></pre></div></blockquote><p>Let&#x27;s count the total number of examples after we merged the files:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Total number of raw examples: &#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_raw<span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Total number of raw examples:  125164</code></pre></div></blockquote><h2 id="preprocessing-the-dataset" style="position:relative">Preprocessing the dataset<a href="#preprocessing-the-dataset" aria-label="preprocessing the dataset permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><h3 id="filtering-out-incomplete-examples" style="position:relative">Filtering out incomplete examples<a href="#filtering-out-incomplete-examples" aria-label="filtering out incomplete examples permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>It is possible that some recipes don&#x27;t have some required fields (<em>name</em>, <em>ingredients</em> or <em>instructions</em>). We need to clean our dataset from those incomplete examples.</p><p>The following function will help us filter out recipes which don&#x27;t have either title or ingredients or instructions:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">recipe_validate_required_fields</span><span class="token punctuation">(</span>recipe<span class="token punctuation">)</span><span class="token punctuation">:</span>
    required_keys <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">&#x27;title&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;ingredients&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;instructions&#x27;</span><span class="token punctuation">]</span>

    <span class="token keyword">if</span> <span class="token keyword">not</span> recipe<span class="token punctuation">:</span>
        <span class="token keyword">return</span> <span class="token boolean">False</span>

    <span class="token keyword">for</span> required_key <span class="token keyword">in</span> required_keys<span class="token punctuation">:</span>
        <span class="token keyword">if</span> <span class="token keyword">not</span> recipe<span class="token punctuation">[</span>required_key<span class="token punctuation">]</span><span class="token punctuation">:</span>
            <span class="token keyword">return</span> <span class="token boolean">False</span>

        <span class="token keyword">if</span> <span class="token builtin">type</span><span class="token punctuation">(</span>recipe<span class="token punctuation">[</span>required_key<span class="token punctuation">]</span><span class="token punctuation">)</span> <span class="token operator">==</span> <span class="token builtin">list</span> <span class="token keyword">and</span> <span class="token builtin">len</span><span class="token punctuation">(</span>recipe<span class="token punctuation">[</span>required_key<span class="token punctuation">]</span><span class="token punctuation">)</span> <span class="token operator">==</span> <span class="token number">0</span><span class="token punctuation">:</span>
            <span class="token keyword">return</span> <span class="token boolean">False</span>

    <span class="token keyword">return</span> <span class="token boolean">True</span></code></pre></div><p>Let&#x27;s do the filtering now using <code class="language-text">recipe_validate_required_fields()</code> function:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">dataset_validated <span class="token operator">=</span> <span class="token punctuation">[</span>recipe <span class="token keyword">for</span> recipe <span class="token keyword">in</span> dataset_raw <span class="token keyword">if</span> recipe_validate_required_fields<span class="token punctuation">(</span>recipe<span class="token punctuation">)</span><span class="token punctuation">]</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Dataset size BEFORE validation&#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_raw<span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Dataset size AFTER validation&#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_validated<span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Number of incomplete recipes&#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_raw<span class="token punctuation">)</span> <span class="token operator">-</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_validated<span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Dataset size BEFORE validation 125164
Dataset size AFTER validation 122938
Number of incomplete recipes 2226</code></pre></div></blockquote><p>As you may see among <code class="language-text">125164</code> recipes we had <code class="language-text">2226</code> somehow incomplete.</p><h3 id="converting-recipes-objects-into-strings" style="position:relative">Converting recipes objects into strings<a href="#converting-recipes-objects-into-strings" aria-label="converting recipes objects into strings permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>RNN doesn&#x27;t understand objects. Therefore, we need to convert recipes objects to string and then to numbers (indices). Let&#x27;s start with converting recipes objects to strings.</p><p>To help our RNN learn the structure of the text faster let&#x27;s add 3 &quot;landmarks&quot; to it. We will use these unique &quot;title&quot;, &quot;ingredients&quot; and &quot;instruction&quot; landmarks to separate the logic sections of each recipe.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">STOP_WORD_TITLE <span class="token operator">=</span> <span class="token string">&#x27;📗 &#x27;</span>
STOP_WORD_INGREDIENTS <span class="token operator">=</span> <span class="token string">&#x27;\n🥕\n\n&#x27;</span>
STOP_WORD_INSTRUCTIONS <span class="token operator">=</span> <span class="token string">&#x27;\n📝\n\n&#x27;</span></code></pre></div><p>The following function converts the recipe object to a string (sequence of characters) for later usage in RNN input.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">recipe_to_string</span><span class="token punctuation">(</span>recipe<span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token comment"># This string is presented as a part of recipes so we need to clean it up.</span>
    noize_string <span class="token operator">=</span> <span class="token string">&#x27;ADVERTISEMENT&#x27;</span>

    title <span class="token operator">=</span> recipe<span class="token punctuation">[</span><span class="token string">&#x27;title&#x27;</span><span class="token punctuation">]</span>
    ingredients <span class="token operator">=</span> recipe<span class="token punctuation">[</span><span class="token string">&#x27;ingredients&#x27;</span><span class="token punctuation">]</span>
    instructions <span class="token operator">=</span> recipe<span class="token punctuation">[</span><span class="token string">&#x27;instructions&#x27;</span><span class="token punctuation">]</span><span class="token punctuation">.</span>split<span class="token punctuation">(</span><span class="token string">&#x27;\n&#x27;</span><span class="token punctuation">)</span>

    ingredients_string <span class="token operator">=</span> <span class="token string">&#x27;&#x27;</span>
    <span class="token keyword">for</span> ingredient <span class="token keyword">in</span> ingredients<span class="token punctuation">:</span>
        ingredient <span class="token operator">=</span> ingredient<span class="token punctuation">.</span>replace<span class="token punctuation">(</span>noize_string<span class="token punctuation">,</span> <span class="token string">&#x27;&#x27;</span><span class="token punctuation">)</span>
        <span class="token keyword">if</span> ingredient<span class="token punctuation">:</span>
            ingredients_string <span class="token operator">+=</span> <span class="token string-interpolation"><span class="token string">f&#x27;• </span><span class="token interpolation"><span class="token punctuation">{</span>ingredient<span class="token punctuation">}</span></span><span class="token string">\n&#x27;</span></span>

    instructions_string <span class="token operator">=</span> <span class="token string">&#x27;&#x27;</span>
    <span class="token keyword">for</span> instruction <span class="token keyword">in</span> instructions<span class="token punctuation">:</span>
        instruction <span class="token operator">=</span> instruction<span class="token punctuation">.</span>replace<span class="token punctuation">(</span>noize_string<span class="token punctuation">,</span> <span class="token string">&#x27;&#x27;</span><span class="token punctuation">)</span>
        <span class="token keyword">if</span> instruction<span class="token punctuation">:</span>
            instructions_string <span class="token operator">+=</span> <span class="token string-interpolation"><span class="token string">f&#x27;▪︎ </span><span class="token interpolation"><span class="token punctuation">{</span>instruction<span class="token punctuation">}</span></span><span class="token string">\n&#x27;</span></span>

    <span class="token keyword">return</span> <span class="token string-interpolation"><span class="token string">f&#x27;</span><span class="token interpolation"><span class="token punctuation">{</span>STOP_WORD_TITLE<span class="token punctuation">}</span></span><span class="token interpolation"><span class="token punctuation">{</span>title<span class="token punctuation">}</span></span><span class="token string">\n</span><span class="token interpolation"><span class="token punctuation">{</span>STOP_WORD_INGREDIENTS<span class="token punctuation">}</span></span><span class="token interpolation"><span class="token punctuation">{</span>ingredients_string<span class="token punctuation">}</span></span><span class="token interpolation"><span class="token punctuation">{</span>STOP_WORD_INSTRUCTIONS<span class="token punctuation">}</span></span><span class="token interpolation"><span class="token punctuation">{</span>instructions_string<span class="token punctuation">}</span></span><span class="token string">&#x27;</span></span></code></pre></div><p>Let&#x27;s apply <code class="language-text">recipe_to_string()</code> function to <code class="language-text">dataset_validated</code>:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">dataset_stringified <span class="token operator">=</span> <span class="token punctuation">[</span>recipe_to_string<span class="token punctuation">(</span>recipe<span class="token punctuation">)</span> <span class="token keyword">for</span> recipe <span class="token keyword">in</span> dataset_validated<span class="token punctuation">]</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Stringified dataset size: &#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_stringified<span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Stringified dataset size:  122938</code></pre></div></blockquote><p>Let&#x27;s preview first several recipes:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">for</span> recipe_index<span class="token punctuation">,</span> recipe_string <span class="token keyword">in</span> <span class="token builtin">enumerate</span><span class="token punctuation">(</span>dataset_stringified<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">3</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Recipe #{}\n---------&#x27;</span><span class="token punctuation">.</span><span class="token builtin">format</span><span class="token punctuation">(</span>recipe_index <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span>recipe_string<span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;\n&#x27;</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Recipe #1
---------
📗 Slow Cooker Chicken and Dumplings

🥕

• 4 skinless, boneless chicken breast halves
• 2 tablespoons butter
• 2 (10.75 ounce) cans condensed cream of chicken soup
• 1 onion, finely diced
• 2 (10 ounce) packages refrigerated biscuit dough, torn into pieces

📝

▪︎ Place the chicken, butter, soup, and onion in a slow cooker, and fill with enough water to cover.
▪︎ Cover, and cook for 5 to 6 hours on High. About 30 minutes before serving, place the torn biscuit dough in the slow cooker. Cook until the dough is no longer raw in the center.


Recipe #2
---------
📗 Awesome Slow Cooker Pot Roast

🥕

• 2 (10.75 ounce) cans condensed cream of mushroom soup
• 1 (1 ounce) package dry onion soup mix
• 1 1/4 cups water
• 5 1/2 pounds pot roast

📝

▪︎ In a slow cooker, mix cream of mushroom soup, dry onion soup mix and water. Place pot roast in slow cooker and coat with soup mixture.
▪︎ Cook on High setting for 3 to 4 hours, or on Low setting for 8 to 9 hours.


Recipe #3
---------
📗 Brown Sugar Meatloaf

🥕

• 1/2 cup packed brown sugar
• 1/2 cup ketchup
• 1 1/2 pounds lean ground beef
• 3/4 cup milk
• 2 eggs
• 1 1/2 teaspoons salt
• 1/4 teaspoon ground black pepper
• 1 small onion, chopped
• 1/4 teaspoon ground ginger
• 3/4 cup finely crushed saltine cracker crumbs

📝

▪︎ Preheat oven to 350 degrees F (175 degrees C). Lightly grease a 5x9 inch loaf pan.
▪︎ Press the brown sugar in the bottom of the prepared loaf pan and spread the ketchup over the sugar.
▪︎ In a mixing bowl, mix thoroughly all remaining ingredients and shape into a loaf. Place on top of the ketchup.
▪︎ Bake in preheated oven for 1 hour or until juices are clear.</code></pre></div></blockquote><p>Just out of curiosity let&#x27;s preview the recipe somewhere from the middle of the dataset to see that it has expected data structure:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span>dataset_stringified<span class="token punctuation">[</span><span class="token number">50000</span><span class="token punctuation">]</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">📗 Herbed Bean Ragoût

🥕

• 6 ounces haricots verts (French thin green beans), trimmed and halved crosswise
• 1 (1-pound) bag frozen edamame (soybeans in the pod) or 1 1/4 cups frozen shelled edamame, not thawed
• 2/3 cup finely chopped onion
• 2 garlic cloves, minced
• 1 Turkish bay leaf or 1/2 California bay leaf
• 2 (3-inch) fresh rosemary sprigs
• 1/2 teaspoon salt
• 1/4 teaspoon black pepper
• 1 tablespoon olive oil
• 1 medium carrot, cut into 1/8-inch dice
• 1 medium celery rib, cut into 1/8-inch dice
• 1 (15- to 16-ounces) can small white beans, rinsed and drained
• 1 1/2 cups chicken stock or low-sodium broth
• 2 tablespoons unsalted butter
• 2 tablespoons finely chopped fresh flat-leaf parsley
• 1 tablespoon finely chopped fresh chervil (optional)
• Garnish: fresh chervil sprigs

📝

▪︎ Cook haricots verts in a large pot of boiling salted water until just tender, 3 to 4 minutes. Transfer with a slotted spoon to a bowl of ice and cold water, then drain. Add edamame to boiling water and cook 4 minutes. Drain in a colander, then rinse under cold water. If using edamame in pods, shell them and discard pods. Cook onion, garlic, bay leaf, rosemary, salt, and pepper in oil in a 2- to 4-quart heavy saucepan over moderately low heat, stirring, until softened, about 3 minutes. Add carrot and celery and cook, stirring, until softened, about 3 minutes. Add white beans and stock and simmer, covered, stirring occasionally, 10 minutes. Add haricots verts and edamame and simmer, uncovered, until heated through, 2 to 3 minutes. Add butter, parsley, and chervil (if using) and stir gently until butter is melted. Discard bay leaf and rosemary sprigs.
▪︎ Cook haricots verts in a large pot of boiling salted water until just tender, 3 to 4 minutes. Transfer with a slotted spoon to a bowl of ice and cold water, then drain.
▪︎ Add edamame to boiling water and cook 4 minutes. Drain in a colander, then rinse under cold water. If using edamame in pods, shell them and discard pods.
▪︎ Cook onion, garlic, bay leaf, rosemary, salt, and pepper in oil in a 2- to 4-quart heavy saucepan over moderately low heat, stirring, until softened, about 3 minutes. Add carrot and celery and cook, stirring, until softened, about 3 minutes.
▪︎ Add white beans and stock and simmer, covered, stirring occasionally, 10 minutes. Add haricots verts and edamame and simmer, uncovered, until heated through, 2 to 3 minutes. Add butter, parsley, and chervil (if using) and stir gently until butter is melted. Discard bay leaf and rosemary sprigs.</code></pre></div></blockquote><h3 id="filtering-out-large-recipes" style="position:relative">Filtering out large recipes<a href="#filtering-out-large-recipes" aria-label="filtering out large recipes permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>Recipes have different lengths. We need to have one <em>hard-coded sequence length</em> limit before feeding recipe sequences to RNN. We need to find out what recipe length will cover most of the recipe use-cases and at the same time we want to keep it as small as possible to speed up the training process.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">recipes_lengths <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
<span class="token keyword">for</span> recipe_text <span class="token keyword">in</span> dataset_stringified<span class="token punctuation">:</span>
    recipes_lengths<span class="token punctuation">.</span>append<span class="token punctuation">(</span><span class="token builtin">len</span><span class="token punctuation">(</span>recipe_text<span class="token punctuation">)</span><span class="token punctuation">)</span>

plt<span class="token punctuation">.</span>hist<span class="token punctuation">(</span>recipes_lengths<span class="token punctuation">,</span> bins<span class="token operator">=</span><span class="token number">50</span><span class="token punctuation">)</span>
plt<span class="token punctuation">.</span>show<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><p><span class="gatsby-resp-image-wrapper" style="position:relative;display:block;margin-left:auto;margin-right:auto;max-width:388px">
      <span class="gatsby-resp-image-background-image" style="padding-bottom:64%;position:relative;bottom:0;left:0;background-image:url(&#x27;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAANCAYAAACpUE5eAAAACXBIWXMAAAsSAAALEgHS3X78AAABxUlEQVQ4y62TT0sbQRjGX2nTXioqYiAVK3jSfoJS8OwX8FBo68UeBGm9+QFaiHoQ7UEPXgt6EC2pRdxoDTEErTViD4U1iK66iTu72ZW6mY0SZ155U41/kGLUF16e2X1mfsyzMwuIWJYyGOipVP82c365rjufzWajpbTruhHO+bJpmkMQ7O3zISJknP3YQtLCu5TneatQ4/fXAUC1YWaUiMpQCHkkpTwusY8IyDmPAdVcbAlM2wlHVBNpAplSyhvv7GwN5zwOgUAgAAAVhpWZvRfgm7dtj+gbeu5BeDiygbrjFUxxW2Bj0/MKAHjA3b/hT9//4KaVvRvw6bOGKgB4vL6zN/tqZBHZQa4IvGnsS8DW9ve+PCJspa1wy0AUf27aZEohZCl1DqysrqmlQ9k1rJmXPT9wIrF7iIj5vBB5UillUf8zPixeG/pTlEQSdJaJvwjOYceXlUIMIShyMdLFeNdFLmgul1srAOkuLiV+B5s/flP8nWPjn0OLM0ktFdozWGhb0xTbtr/quj5N7TjOpKZpCmNsyrKskPbPn0in0/Oqqg4SEN51dZeRvl5GqP8wSvwn9EwNAJWn6qPDu/juiv8QAMpPALB1cC4MyhsSAAAAAElFTkSuQmCC&#x27;);background-size:cover;display:block"></span>
  <img class="gatsby-resp-image-image" alt="Recipes lengths 1" title="Recipes lengths 1" src="/static/42704f5abc08216d5d26d0c9347b62e2/96c67/2.png" srcSet="/static/42704f5abc08216d5d26d0c9347b62e2/63868/2.png 250w,/static/42704f5abc08216d5d26d0c9347b62e2/96c67/2.png 388w" sizes="(max-width: 388px) 100vw, 388px" style="width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0" loading="lazy"/>
    </span></p><p>Most of the recipes have length less than <code class="language-text">5000</code> characters. Let&#x27;s zoom in to see more detailed picture:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">plt<span class="token punctuation">.</span>hist<span class="token punctuation">(</span>recipes_lengths<span class="token punctuation">,</span> <span class="token builtin">range</span><span class="token operator">=</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">8000</span><span class="token punctuation">)</span><span class="token punctuation">,</span> bins<span class="token operator">=</span><span class="token number">50</span><span class="token punctuation">)</span>
plt<span class="token punctuation">.</span>show<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><p><span class="gatsby-resp-image-wrapper" style="position:relative;display:block;margin-left:auto;margin-right:auto;max-width:387px">
      <span class="gatsby-resp-image-background-image" style="padding-bottom:64.8%;position:relative;bottom:0;left:0;background-image:url(&#x27;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAANCAYAAACpUE5eAAAACXBIWXMAAAsSAAALEgHS3X78AAACN0lEQVQ4y42Tz2sTQRTHn6IixUbbNBYFqyieNGD+ACGw5CCYayAgeBAJKJpaxJ9Q9ealhQrqTS1JTMVCGtRWRaNkRekPW0EPahpYk2LSze42G9MmZnfmyUxTNiq0GfjyZob3PvN9uzPgcDgcANCeSqWOqap6dzab65MLSr+qqk1LUZQ+TdNuS5J0CgRBsCEizMtyHBHxTuI7pudLbIqmaSIhZE2xPEopFovFKXC5XO0AsKGgKIMMcvzeeCU6kakhYs0klMcmVGG1pVLpJTidzg4G1Be0ULVm4sFrz43e+BfKHRLKT25CJsvXdT0BbrfbBgCwoKnhmknw0I0XxpGBJAc2Fq02/gIKgtAKAC2/ilokr1e4w64LT+i3/PJ3JGR12H9An8/XAgDraEWPfEgr2NE9YnT2xGl0/Af+23ZTQL/fz4CA1VKYAbf3xA17cISeG5rhyQYhaLXfBNDj8fCW6ZIemZI07pC1vP/qKK60bZiNUMttQ7SAgUBgM3NIl/RQbHoO287GqvsuPzPtwZh54sEES+TiRZTySOqRWuvfdeAr8Hq9WwBgI9YWh2+OfUXbmRjuvTKGey6N4o7zT/H0wxmclRdx7V+DWC6X3wN7JUz5TPp67+PJSVsg/GZ399C7XcFHya5gVGw9GUoeuDgsBgdF8X7is/j6U1qcTs0l09m8mM3JycxPWczJhbeqpn2UJOkWhx0VDq8HgE2wPJhjdtnZ/bTvBGgDS+xVbatrJYftbwWATrb/B8ikG2zEbqVHAAAAAElFTkSuQmCC&#x27;);background-size:cover;display:block"></span>
  <img class="gatsby-resp-image-image" alt="Recipes lengths 2" title="Recipes lengths 2" src="/static/3cc69385b2215b463ccdb324f0724a68/691c3/3.png" srcSet="/static/3cc69385b2215b463ccdb324f0724a68/63868/3.png 250w,/static/3cc69385b2215b463ccdb324f0724a68/691c3/3.png 387w" sizes="(max-width: 387px) 100vw, 387px" style="width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0" loading="lazy"/>
    </span></p><p>Looks like a limit of <code class="language-text">2000</code> characters for the recipes will cover most of the cases. We may try to train RNN with this maximum recipe length limit.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">MAX_RECIPE_LENGTH <span class="token operator">=</span> <span class="token number">2000</span></code></pre></div><p>Therefore, let&#x27;s filter out all the recipes that are longer than <code class="language-text">MAX_RECIPE_LENGTH</code>:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">filter_recipes_by_length</span><span class="token punctuation">(</span>recipe_test<span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">return</span> <span class="token builtin">len</span><span class="token punctuation">(</span>recipe_test<span class="token punctuation">)</span> <span class="token operator">&lt;=</span> MAX_RECIPE_LENGTH

dataset_filtered <span class="token operator">=</span> <span class="token punctuation">[</span>recipe_text <span class="token keyword">for</span> recipe_text <span class="token keyword">in</span> dataset_stringified <span class="token keyword">if</span> filter_recipes_by_length<span class="token punctuation">(</span>recipe_text<span class="token punctuation">)</span><span class="token punctuation">]</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Dataset size BEFORE filtering: &#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_stringified<span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Dataset size AFTER filtering: &#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_filtered<span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Number of eliminated recipes: &#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_stringified<span class="token punctuation">)</span> <span class="token operator">-</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_filtered<span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Dataset size BEFORE filtering:  122938
Dataset size AFTER filtering:  100212
Number of eliminated recipes:  22726</code></pre></div></blockquote><p>We lost <code class="language-text">22726</code> recipes during this filtering but now recipes&#x27; data is more dense.</p><h3 id="summarizing-dataset-parameters" style="position:relative">Summarizing dataset parameters<a href="#summarizing-dataset-parameters" aria-label="summarizing dataset parameters permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">TOTAL_RECIPES_NUM <span class="token operator">=</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_filtered<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;MAX_RECIPE_LENGTH: &#x27;</span><span class="token punctuation">,</span> MAX_RECIPE_LENGTH<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;TOTAL_RECIPES_NUM: &#x27;</span><span class="token punctuation">,</span> TOTAL_RECIPES_NUM<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">MAX_RECIPE_LENGTH:  2000
TOTAL_RECIPES_NUM:  100212</code></pre></div></blockquote><p>Finally, we ended up with <code class="language-text">~100k</code> recipes. Each recipe has <code class="language-text">2000</code> characters length.</p><h2 id="creating-vocabulary" style="position:relative">Creating vocabulary<a href="#creating-vocabulary" aria-label="creating vocabulary permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>Recurrent neural network doesn&#x27;t understand characters or words. It understands numbers instead. Therefore, we need to convert recipe texts to numbers.</p><p>In this experiment we&#x27;re going to use a <strong>character-level</strong> language model based on multi-layer LSTM (Long Short-Term Memory) network (as opposed to <strong>word-level</strong> language model). It means that instead of creating unique indices for words we will create unique indices for characters. By doing that we let the network predict the next <em>character</em> instead of the next <em>word</em> in a sequence.</p><p>ℹ️ You may find more details about character-level RNNs explanation in the <a href="http://karpathy.github.io/2015/05/21/rnn-effectiveness/">Unreasonable Effectiveness of Recurrent Neural Networks</a> article by <em>Andrej Karpathy</em>:</p><p>To create a vocabulary out of recipes texts we will use <a href="https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text/Tokenizer">tf.keras.preprocessing.text.Tokenizer</a>.</p><p>We also need to come with some unique character that will be treated as a <em>stop-character</em> and will indicate the end of a recipe. We need it for recipe generation afterwards since without this stop-character we won&#x27;t know where the end of a recipe that we&#x27;re generating is.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">STOP_SIGN <span class="token operator">=</span> <span class="token string">&#x27;␣&#x27;</span>

tokenizer <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>text<span class="token punctuation">.</span>Tokenizer<span class="token punctuation">(</span>
    char_level<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
    filters<span class="token operator">=</span><span class="token string">&#x27;&#x27;</span><span class="token punctuation">,</span>
    lower<span class="token operator">=</span><span class="token boolean">False</span><span class="token punctuation">,</span>
    split<span class="token operator">=</span><span class="token string">&#x27;&#x27;</span>
<span class="token punctuation">)</span>

<span class="token comment"># Stop word is not a part of recipes, but tokenizer must know about it as well.</span>
tokenizer<span class="token punctuation">.</span>fit_on_texts<span class="token punctuation">(</span><span class="token punctuation">[</span>STOP_SIGN<span class="token punctuation">]</span><span class="token punctuation">)</span>

tokenizer<span class="token punctuation">.</span>fit_on_texts<span class="token punctuation">(</span>dataset_filtered<span class="token punctuation">)</span>

tokenizer<span class="token punctuation">.</span>get_config<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">{&#x27;num_words&#x27;: None,
 &#x27;filters&#x27;: &#x27;&#x27;,
 &#x27;lower&#x27;: False,
 &#x27;split&#x27;: &#x27;&#x27;,
 &#x27;char_level&#x27;: True,
 &#x27;oov_token&#x27;: None,
 &#x27;document_count&#x27;: 100213,

 &#x27;word_counts&#x27;: &#x27;{&quot;\\u2423&quot;: 1, &quot;\\ud83d\\udcd7&quot;: 100212, &quot; &quot;: 17527888, &quot;S&quot;: 270259, &quot;l&quot;: 3815150, &quot;o&quot;: 5987496, &quot;w&quot;: 964459, &quot;C&quot;: 222831, &quot;k&quot;: 890982, &quot;e&quot;: 9296022, &quot;r&quot;: 4760887, &quot;h&quot;: 2922100, &quot;i&quot;: 4911812, &quot;c&quot;: 2883507, &quot;n&quot;: 5304396, &quot;a&quot;: 6067157, &quot;d&quot;: 3099679, &quot;D&quot;: 63999, &quot;u&quot;: 2717050, &quot;m&quot;: 1794411, &quot;p&quot;: 2679164, &quot;g&quot;: 1698670, &quot;s&quot;: 4704222, &quot;\\n&quot;: 1955281, &quot;\\ud83e\\udd55&quot;: 100212, &quot;\\u2022&quot;: 922813, &quot;4&quot;: 232607, &quot;,&quot;: 1130487, &quot;b&quot;: 1394803, &quot;t&quot;: 5997722, &quot;v&quot;: 746785, &quot;2&quot;: 493933, &quot;(&quot;: 144985, &quot;1&quot;: 853931, &quot;0&quot;: 145119, &quot;.&quot;: 1052548, &quot;7&quot;: 31098, &quot;5&quot;: 154071, &quot;)&quot;: 144977, &quot;f&quot;: 1042981, &quot;y&quot;: 666553, &quot;\\ud83d\\udcdd&quot;: 100212, &quot;\\u25aa&quot;: 331058, &quot;\\ufe0e&quot;: 331058, &quot;P&quot;: 200597, &quot;6&quot;: 51398, &quot;H&quot;: 43936, &quot;A&quot;: 134274, &quot;3&quot;: 213519, &quot;R&quot;: 101253, &quot;x&quot;: 201286, &quot;/&quot;: 345257, &quot;I&quot;: 81591, &quot;L&quot;: 46138, &quot;8&quot;: 55352, &quot;9&quot;: 17697, &quot;B&quot;: 123813, &quot;M&quot;: 78684, &quot;F&quot;: 104359, &quot;j&quot;: 110008, &quot;-&quot;: 219160, &quot;W&quot;: 61616, &quot;\\u00ae&quot;: 10159, &quot;N&quot;: 12808, &quot;q&quot;: 69654, &quot;T&quot;: 101371, &quot;;&quot;: 72045, &quot;\&#x27;&quot;: 26831, &quot;Z&quot;: 2428, &quot;z&quot;: 115883, &quot;G&quot;: 52043, &quot;:&quot;: 31318, &quot;E&quot;: 18582, &quot;K&quot;: 18421, &quot;X&quot;: 385, &quot;\\&quot;&quot;: 6445, &quot;O&quot;: 28971, &quot;Y&quot;: 6064, &quot;\\u2122&quot;: 538, &quot;Q&quot;: 3904, &quot;J&quot;: 10269, &quot;!&quot;: 3014, &quot;U&quot;: 14132, &quot;V&quot;: 12172, &quot;&amp;&quot;: 1039, &quot;+&quot;: 87, &quot;=&quot;: 113, &quot;%&quot;: 993, &quot;*&quot;: 3243, &quot;\\u00a9&quot;: 99, &quot;[&quot;: 30, &quot;]&quot;: 31, &quot;\\u00e9&quot;: 6727, &quot;&lt;&quot;: 76, &quot;&gt;&quot;: 86, &quot;\\u00bd&quot;: 166, &quot;#&quot;: 168, &quot;\\u00f1&quot;: 891, &quot;?&quot;: 327, &quot;\\u2019&quot;: 111, &quot;\\u00b0&quot;: 6808, &quot;\\u201d&quot;: 6, &quot;$&quot;: 84, &quot;@&quot;: 5, &quot;{&quot;: 8, &quot;}&quot;: 9, &quot;\\u2013&quot;: 1228, &quot;\\u0096&quot;: 7, &quot;\\u00e0&quot;: 26, &quot;\\u00e2&quot;: 106, &quot;\\u00e8&quot;: 846, &quot;\\u00e1&quot;: 74, &quot;\\u2014&quot;: 215, &quot;\\u2044&quot;: 16, &quot;\\u00ee&quot;: 415, &quot;\\u00e7&quot;: 171, &quot;_&quot;: 26, &quot;\\u00fa&quot;: 48, &quot;\\u00ef&quot;: 43, &quot;\\u201a&quot;: 20, &quot;\\u00fb&quot;: 36, &quot;\\u00f3&quot;: 74, &quot;\\u00ed&quot;: 130, &quot;\\u25ca&quot;: 4, &quot;\\u00f9&quot;: 12, &quot;\\u00d7&quot;: 6, &quot;\\u00ec&quot;: 8, &quot;\\u00fc&quot;: 29, &quot;\\u2031&quot;: 4, &quot;\\u00ba&quot;: 19, &quot;\\u201c&quot;: 4, &quot;\\u00ad&quot;: 25, &quot;\\u00ea&quot;: 27, &quot;\\u00f6&quot;: 9, &quot;\\u0301&quot;: 11, &quot;\\u00f4&quot;: 8, &quot;\\u00c1&quot;: 2, &quot;\\u00be&quot;: 23, &quot;\\u00bc&quot;: 95, &quot;\\u00eb&quot;: 2, &quot;\\u0097&quot;: 2, &quot;\\u215b&quot;: 3, &quot;\\u2027&quot;: 4, &quot;\\u00e4&quot;: 15, &quot;\\u001a&quot;: 2, &quot;\\u00f8&quot;: 2, &quot;\\ufffd&quot;: 20, &quot;\\u02da&quot;: 6, &quot;\\u00bf&quot;: 264, &quot;\\u2153&quot;: 2, &quot;|&quot;: 2, &quot;\\u00e5&quot;: 3, &quot;\\u00a4&quot;: 1, &quot;\\u201f&quot;: 1, &quot;\\u00a7&quot;: 5, &quot;\\ufb02&quot;: 3, &quot;\\u00a0&quot;: 1, &quot;\\u01b0&quot;: 2, &quot;\\u01a1&quot;: 1, &quot;\\u0103&quot;: 1, &quot;\\u0300&quot;: 1, &quot;\\u00bb&quot;: 6, &quot;`&quot;: 3, &quot;\\u0092&quot;: 2, &quot;\\u215e&quot;: 1, &quot;\\u202d&quot;: 4, &quot;\\u00b4&quot;: 2, &quot;\\u2012&quot;: 2, &quot;\\u00c9&quot;: 40, &quot;\\u00da&quot;: 14, &quot;\\u20ac&quot;: 1, &quot;\\\\&quot;: 5, &quot;~&quot;: 1, &quot;\\u0095&quot;: 1, &quot;\\u00c2&quot;: 2}&#x27;,

 &#x27;word_docs&#x27;: &#x27;{&quot;\\u2423&quot;: 1, &quot;k&quot;: 97316, &quot;0&quot;: 61954, &quot;o&quot;: 100205, &quot;r&quot;: 100207, &quot;d&quot;: 100194, &quot;u&quot;: 100161, &quot;S&quot;: 89250, &quot;\\u25aa&quot;: 100212, &quot;D&quot;: 40870, &quot;1&quot;: 99320, &quot;g&quot;: 99975, &quot;n&quot;: 100198, &quot;b&quot;: 99702, &quot;t&quot;: 100202, &quot;.&quot;: 100163, &quot; &quot;: 100212, &quot;7&quot;: 24377, &quot;3&quot;: 79135, &quot;\\ud83d\\udcd7&quot;: 100212, &quot;i&quot;: 100207, &quot;5&quot;: 65486, &quot;f&quot;: 98331, &quot;c&quot;: 100190, &quot;4&quot;: 82453, &quot;a&quot;: 100205, &quot;2&quot;: 96743, &quot;v&quot;: 97848, &quot;C&quot;: 83328, &quot;s&quot;: 100204, &quot;\\n&quot;: 100212, &quot;6&quot;: 35206, &quot;\\ud83d\\udcdd&quot;: 100212, &quot;,&quot;: 98524, &quot;\\ufe0e&quot;: 100212, &quot;l&quot;: 100206, &quot;e&quot;: 100212, &quot;y&quot;: 96387, &quot;)&quot;: 67614, &quot;p&quot;: 100046, &quot;H&quot;: 31908, &quot;\\ud83e\\udd55&quot;: 100212, &quot;m&quot;: 99988, &quot;w&quot;: 99227, &quot;(&quot;: 67627, &quot;A&quot;: 60900, &quot;h&quot;: 100161, &quot;\\u2022&quot;: 100212, &quot;P&quot;: 79364, &quot;R&quot;: 54040, &quot;9&quot;: 14114, &quot;8&quot;: 37000, &quot;L&quot;: 32101, &quot;x&quot;: 72133, &quot;I&quot;: 46675, &quot;/&quot;: 89051, &quot;j&quot;: 47438, &quot;F&quot;: 57940, &quot;B&quot;: 64278, &quot;M&quot;: 48332, &quot;-&quot;: 74711, &quot;T&quot;: 53758, &quot;\\u00ae&quot;: 5819, &quot;N&quot;: 9981, &quot;W&quot;: 38981, &quot;q&quot;: 36538, &quot;;&quot;: 33863, &quot;G&quot;: 35355, &quot;\&#x27;&quot;: 18120, &quot;z&quot;: 42430, &quot;Z&quot;: 2184, &quot;:&quot;: 18214, &quot;E&quot;: 12161, &quot;K&quot;: 14834, &quot;X&quot;: 321, &quot;\\&quot;&quot;: 2617, &quot;O&quot;: 20103, &quot;Y&quot;: 5148, &quot;\\u2122&quot;: 448, &quot;Q&quot;: 3142, &quot;J&quot;: 8225, &quot;!&quot;: 2428, &quot;U&quot;: 10621, &quot;V&quot;: 9710, &quot;&amp;&quot;: 749, &quot;+&quot;: 32, &quot;=&quot;: 48, &quot;%&quot;: 717, &quot;*&quot;: 1780, &quot;\\u00a9&quot;: 91, &quot;]&quot;: 26, &quot;[&quot;: 25, &quot;\\u00e9&quot;: 2462, &quot;&gt;&quot;: 33, &quot;&lt;&quot;: 27, &quot;\\u00bd&quot;: 81, &quot;#&quot;: 139, &quot;\\u00f1&quot;: 423, &quot;?&quot;: 207, &quot;\\u2019&quot;: 64, &quot;\\u00b0&quot;: 3062, &quot;\\u201d&quot;: 3, &quot;@&quot;: 4, &quot;$&quot;: 49, &quot;{&quot;: 7, &quot;}&quot;: 8, &quot;\\u2013&quot;: 491, &quot;\\u0096&quot;: 7, &quot;\\u00e0&quot;: 22, &quot;\\u00e2&quot;: 45, &quot;\\u00e8&quot;: 335, &quot;\\u00e1&quot;: 38, &quot;\\u2014&quot;: 95, &quot;\\u2044&quot;: 9, &quot;\\u00ee&quot;: 122, &quot;\\u00e7&quot;: 120, &quot;_&quot;: 8, &quot;\\u00fa&quot;: 25, &quot;\\u00ef&quot;: 24, &quot;\\u201a&quot;: 10, &quot;\\u00fb&quot;: 29, &quot;\\u00f3&quot;: 40, &quot;\\u00ed&quot;: 52, &quot;\\u25ca&quot;: 2, &quot;\\u00f9&quot;: 6, &quot;\\u00d7&quot;: 4, &quot;\\u00ec&quot;: 4, &quot;\\u00fc&quot;: 19, &quot;\\u2031&quot;: 2, &quot;\\u00ba&quot;: 9, &quot;\\u201c&quot;: 2, &quot;\\u00ad&quot;: 11, &quot;\\u00ea&quot;: 4, &quot;\\u00f6&quot;: 4, &quot;\\u0301&quot;: 6, &quot;\\u00f4&quot;: 5, &quot;\\u00c1&quot;: 2, &quot;\\u00be&quot;: 18, &quot;\\u00bc&quot;: 55, &quot;\\u00eb&quot;: 2, &quot;\\u0097&quot;: 1, &quot;\\u215b&quot;: 2, &quot;\\u2027&quot;: 3, &quot;\\u00e4&quot;: 8, &quot;\\u001a&quot;: 1, &quot;\\u00f8&quot;: 1, &quot;\\ufffd&quot;: 4, &quot;\\u02da&quot;: 3, &quot;\\u00bf&quot;: 191, &quot;\\u2153&quot;: 1, &quot;|&quot;: 2, &quot;\\u00e5&quot;: 1, &quot;\\u00a4&quot;: 1, &quot;\\u201f&quot;: 1, &quot;\\u00a7&quot;: 3, &quot;\\ufb02&quot;: 1, &quot;\\u0300&quot;: 1, &quot;\\u01a1&quot;: 1, &quot;\\u00a0&quot;: 1, &quot;\\u01b0&quot;: 1, &quot;\\u0103&quot;: 1, &quot;\\u00bb&quot;: 2, &quot;`&quot;: 3, &quot;\\u0092&quot;: 2, &quot;\\u215e&quot;: 1, &quot;\\u202d&quot;: 1, &quot;\\u00b4&quot;: 1, &quot;\\u2012&quot;: 1, &quot;\\u00c9&quot;: 15, &quot;\\u00da&quot;: 5, &quot;\\u20ac&quot;: 1, &quot;\\\\&quot;: 5, &quot;~&quot;: 1, &quot;\\u0095&quot;: 1, &quot;\\u00c2&quot;: 1}&#x27;,

 &#x27;index_docs&#x27;: &#x27;{&quot;1&quot;: 100212, &quot;165&quot;: 1, &quot;25&quot;: 97316, &quot;41&quot;: 61954, &quot;5&quot;: 100205, &quot;8&quot;: 100207, &quot;11&quot;: 100194, &quot;14&quot;: 100161, &quot;33&quot;: 89250, &quot;31&quot;: 100212, &quot;58&quot;: 40870, &quot;26&quot;: 99320, &quot;18&quot;: 99975, &quot;6&quot;: 100198, &quot;19&quot;: 99702, &quot;4&quot;: 100202, &quot;21&quot;: 100163, &quot;66&quot;: 24377, &quot;37&quot;: 79135, &quot;51&quot;: 100212, &quot;7&quot;: 100207, &quot;40&quot;: 65486, &quot;22&quot;: 98331, &quot;13&quot;: 100190, &quot;34&quot;: 82453, &quot;3&quot;: 100205, &quot;29&quot;: 96743, &quot;27&quot;: 97848, &quot;35&quot;: 83328, &quot;9&quot;: 100204, &quot;16&quot;: 100212, &quot;62&quot;: 35206, &quot;53&quot;: 100212, &quot;20&quot;: 98524, &quot;32&quot;: 100212, &quot;10&quot;: 100206, &quot;2&quot;: 100212, &quot;28&quot;: 96387, &quot;43&quot;: 67614, &quot;15&quot;: 100046, &quot;64&quot;: 31908, &quot;52&quot;: 100212, &quot;17&quot;: 99988, &quot;23&quot;: 99227, &quot;42&quot;: 67627, &quot;44&quot;: 60900, &quot;12&quot;: 100161, &quot;24&quot;: 100212, &quot;39&quot;: 79364, &quot;50&quot;: 54040, &quot;71&quot;: 14114, &quot;60&quot;: 37000, &quot;63&quot;: 32101, &quot;38&quot;: 72133, &quot;54&quot;: 46675, &quot;30&quot;: 89051, &quot;47&quot;: 47438, &quot;48&quot;: 57940, &quot;45&quot;: 64278, &quot;55&quot;: 48332, &quot;36&quot;: 74711, &quot;49&quot;: 53758, &quot;76&quot;: 5819, &quot;73&quot;: 9981, &quot;59&quot;: 38981, &quot;57&quot;: 36538, &quot;56&quot;: 33863, &quot;61&quot;: 35355, &quot;68&quot;: 18120, &quot;46&quot;: 42430, &quot;84&quot;: 2184, &quot;65&quot;: 18214, &quot;69&quot;: 12161, &quot;70&quot;: 14834, &quot;92&quot;: 321, &quot;79&quot;: 2617, &quot;67&quot;: 20103, &quot;80&quot;: 5148, &quot;90&quot;: 448, &quot;81&quot;: 3142, &quot;75&quot;: 8225, &quot;83&quot;: 2428, &quot;72&quot;: 10621, &quot;74&quot;: 9710, &quot;86&quot;: 749, &quot;105&quot;: 32, &quot;100&quot;: 48, &quot;87&quot;: 717, &quot;82&quot;: 1780, &quot;103&quot;: 91, &quot;115&quot;: 26, &quot;116&quot;: 25, &quot;78&quot;: 2462, &quot;106&quot;: 33, &quot;108&quot;: 27, &quot;98&quot;: 81, &quot;97&quot;: 139, &quot;88&quot;: 423, &quot;93&quot;: 207, &quot;101&quot;: 64, &quot;77&quot;: 3062, &quot;137&quot;: 3, &quot;141&quot;: 4, &quot;107&quot;: 49, &quot;133&quot;: 7, &quot;131&quot;: 8, &quot;85&quot;: 491, &quot;136&quot;: 7, &quot;119&quot;: 22, &quot;102&quot;: 45, &quot;89&quot;: 335, &quot;109&quot;: 38, &quot;95&quot;: 95, &quot;126&quot;: 9, &quot;91&quot;: 122, &quot;96&quot;: 120, &quot;120&quot;: 8, &quot;111&quot;: 25, &quot;112&quot;: 24, &quot;123&quot;: 10, &quot;114&quot;: 29, &quot;110&quot;: 40, &quot;99&quot;: 52, &quot;144&quot;: 2, &quot;129&quot;: 6, &quot;138&quot;: 4, &quot;134&quot;: 4, &quot;117&quot;: 19, &quot;145&quot;: 2, &quot;125&quot;: 9, &quot;146&quot;: 2, &quot;121&quot;: 11, &quot;118&quot;: 4, &quot;132&quot;: 4, &quot;130&quot;: 6, &quot;135&quot;: 5, &quot;153&quot;: 2, &quot;122&quot;: 18, &quot;104&quot;: 55, &quot;154&quot;: 2, &quot;155&quot;: 1, &quot;149&quot;: 2, &quot;147&quot;: 3, &quot;127&quot;: 8, &quot;156&quot;: 1, &quot;157&quot;: 1, &quot;124&quot;: 4, &quot;139&quot;: 3, &quot;94&quot;: 191, &quot;158&quot;: 1, &quot;159&quot;: 2, &quot;150&quot;: 1, &quot;166&quot;: 1, &quot;167&quot;: 1, &quot;142&quot;: 3, &quot;151&quot;: 1, &quot;171&quot;: 1, &quot;169&quot;: 1, &quot;168&quot;: 1, &quot;160&quot;: 1, &quot;170&quot;: 1, &quot;140&quot;: 2, &quot;152&quot;: 3, &quot;161&quot;: 2, &quot;172&quot;: 1, &quot;148&quot;: 1, &quot;162&quot;: 1, &quot;163&quot;: 1, &quot;113&quot;: 15, &quot;128&quot;: 5, &quot;173&quot;: 1, &quot;143&quot;: 5, &quot;174&quot;: 1, &quot;175&quot;: 1, &quot;164&quot;: 1}&#x27;,

 &#x27;index_word&#x27;: &#x27;{&quot;1&quot;: &quot; &quot;, &quot;2&quot;: &quot;e&quot;, &quot;3&quot;: &quot;a&quot;, &quot;4&quot;: &quot;t&quot;, &quot;5&quot;: &quot;o&quot;, &quot;6&quot;: &quot;n&quot;, &quot;7&quot;: &quot;i&quot;, &quot;8&quot;: &quot;r&quot;, &quot;9&quot;: &quot;s&quot;, &quot;10&quot;: &quot;l&quot;, &quot;11&quot;: &quot;d&quot;, &quot;12&quot;: &quot;h&quot;, &quot;13&quot;: &quot;c&quot;, &quot;14&quot;: &quot;u&quot;, &quot;15&quot;: &quot;p&quot;, &quot;16&quot;: &quot;\\n&quot;, &quot;17&quot;: &quot;m&quot;, &quot;18&quot;: &quot;g&quot;, &quot;19&quot;: &quot;b&quot;, &quot;20&quot;: &quot;,&quot;, &quot;21&quot;: &quot;.&quot;, &quot;22&quot;: &quot;f&quot;, &quot;23&quot;: &quot;w&quot;, &quot;24&quot;: &quot;\\u2022&quot;, &quot;25&quot;: &quot;k&quot;, &quot;26&quot;: &quot;1&quot;, &quot;27&quot;: &quot;v&quot;, &quot;28&quot;: &quot;y&quot;, &quot;29&quot;: &quot;2&quot;, &quot;30&quot;: &quot;/&quot;, &quot;31&quot;: &quot;\\u25aa&quot;, &quot;32&quot;: &quot;\\ufe0e&quot;, &quot;33&quot;: &quot;S&quot;, &quot;34&quot;: &quot;4&quot;, &quot;35&quot;: &quot;C&quot;, &quot;36&quot;: &quot;-&quot;, &quot;37&quot;: &quot;3&quot;, &quot;38&quot;: &quot;x&quot;, &quot;39&quot;: &quot;P&quot;, &quot;40&quot;: &quot;5&quot;, &quot;41&quot;: &quot;0&quot;, &quot;42&quot;: &quot;(&quot;, &quot;43&quot;: &quot;)&quot;, &quot;44&quot;: &quot;A&quot;, &quot;45&quot;: &quot;B&quot;, &quot;46&quot;: &quot;z&quot;, &quot;47&quot;: &quot;j&quot;, &quot;48&quot;: &quot;F&quot;, &quot;49&quot;: &quot;T&quot;, &quot;50&quot;: &quot;R&quot;, &quot;51&quot;: &quot;\\ud83d\\udcd7&quot;, &quot;52&quot;: &quot;\\ud83e\\udd55&quot;, &quot;53&quot;: &quot;\\ud83d\\udcdd&quot;, &quot;54&quot;: &quot;I&quot;, &quot;55&quot;: &quot;M&quot;, &quot;56&quot;: &quot;;&quot;, &quot;57&quot;: &quot;q&quot;, &quot;58&quot;: &quot;D&quot;, &quot;59&quot;: &quot;W&quot;, &quot;60&quot;: &quot;8&quot;, &quot;61&quot;: &quot;G&quot;, &quot;62&quot;: &quot;6&quot;, &quot;63&quot;: &quot;L&quot;, &quot;64&quot;: &quot;H&quot;, &quot;65&quot;: &quot;:&quot;, &quot;66&quot;: &quot;7&quot;, &quot;67&quot;: &quot;O&quot;, &quot;68&quot;: &quot;\&#x27;&quot;, &quot;69&quot;: &quot;E&quot;, &quot;70&quot;: &quot;K&quot;, &quot;71&quot;: &quot;9&quot;, &quot;72&quot;: &quot;U&quot;, &quot;73&quot;: &quot;N&quot;, &quot;74&quot;: &quot;V&quot;, &quot;75&quot;: &quot;J&quot;, &quot;76&quot;: &quot;\\u00ae&quot;, &quot;77&quot;: &quot;\\u00b0&quot;, &quot;78&quot;: &quot;\\u00e9&quot;, &quot;79&quot;: &quot;\\&quot;&quot;, &quot;80&quot;: &quot;Y&quot;, &quot;81&quot;: &quot;Q&quot;, &quot;82&quot;: &quot;*&quot;, &quot;83&quot;: &quot;!&quot;, &quot;84&quot;: &quot;Z&quot;, &quot;85&quot;: &quot;\\u2013&quot;, &quot;86&quot;: &quot;&amp;&quot;, &quot;87&quot;: &quot;%&quot;, &quot;88&quot;: &quot;\\u00f1&quot;, &quot;89&quot;: &quot;\\u00e8&quot;, &quot;90&quot;: &quot;\\u2122&quot;, &quot;91&quot;: &quot;\\u00ee&quot;, &quot;92&quot;: &quot;X&quot;, &quot;93&quot;: &quot;?&quot;, &quot;94&quot;: &quot;\\u00bf&quot;, &quot;95&quot;: &quot;\\u2014&quot;, &quot;96&quot;: &quot;\\u00e7&quot;, &quot;97&quot;: &quot;#&quot;, &quot;98&quot;: &quot;\\u00bd&quot;, &quot;99&quot;: &quot;\\u00ed&quot;, &quot;100&quot;: &quot;=&quot;, &quot;101&quot;: &quot;\\u2019&quot;, &quot;102&quot;: &quot;\\u00e2&quot;, &quot;103&quot;: &quot;\\u00a9&quot;, &quot;104&quot;: &quot;\\u00bc&quot;, &quot;105&quot;: &quot;+&quot;, &quot;106&quot;: &quot;&gt;&quot;, &quot;107&quot;: &quot;$&quot;, &quot;108&quot;: &quot;&lt;&quot;, &quot;109&quot;: &quot;\\u00e1&quot;, &quot;110&quot;: &quot;\\u00f3&quot;, &quot;111&quot;: &quot;\\u00fa&quot;, &quot;112&quot;: &quot;\\u00ef&quot;, &quot;113&quot;: &quot;\\u00c9&quot;, &quot;114&quot;: &quot;\\u00fb&quot;, &quot;115&quot;: &quot;]&quot;, &quot;116&quot;: &quot;[&quot;, &quot;117&quot;: &quot;\\u00fc&quot;, &quot;118&quot;: &quot;\\u00ea&quot;, &quot;119&quot;: &quot;\\u00e0&quot;, &quot;120&quot;: &quot;_&quot;, &quot;121&quot;: &quot;\\u00ad&quot;, &quot;122&quot;: &quot;\\u00be&quot;, &quot;123&quot;: &quot;\\u201a&quot;, &quot;124&quot;: &quot;\\ufffd&quot;, &quot;125&quot;: &quot;\\u00ba&quot;, &quot;126&quot;: &quot;\\u2044&quot;, &quot;127&quot;: &quot;\\u00e4&quot;, &quot;128&quot;: &quot;\\u00da&quot;, &quot;129&quot;: &quot;\\u00f9&quot;, &quot;130&quot;: &quot;\\u0301&quot;, &quot;131&quot;: &quot;}&quot;, &quot;132&quot;: &quot;\\u00f6&quot;, &quot;133&quot;: &quot;{&quot;, &quot;134&quot;: &quot;\\u00ec&quot;, &quot;135&quot;: &quot;\\u00f4&quot;, &quot;136&quot;: &quot;\\u0096&quot;, &quot;137&quot;: &quot;\\u201d&quot;, &quot;138&quot;: &quot;\\u00d7&quot;, &quot;139&quot;: &quot;\\u02da&quot;, &quot;140&quot;: &quot;\\u00bb&quot;, &quot;141&quot;: &quot;@&quot;, &quot;142&quot;: &quot;\\u00a7&quot;, &quot;143&quot;: &quot;\\\\&quot;, &quot;144&quot;: &quot;\\u25ca&quot;, &quot;145&quot;: &quot;\\u2031&quot;, &quot;146&quot;: &quot;\\u201c&quot;, &quot;147&quot;: &quot;\\u2027&quot;, &quot;148&quot;: &quot;\\u202d&quot;, &quot;149&quot;: &quot;\\u215b&quot;, &quot;150&quot;: &quot;\\u00e5&quot;, &quot;151&quot;: &quot;\\ufb02&quot;, &quot;152&quot;: &quot;`&quot;, &quot;153&quot;: &quot;\\u00c1&quot;, &quot;154&quot;: &quot;\\u00eb&quot;, &quot;155&quot;: &quot;\\u0097&quot;, &quot;156&quot;: &quot;\\u001a&quot;, &quot;157&quot;: &quot;\\u00f8&quot;, &quot;158&quot;: &quot;\\u2153&quot;, &quot;159&quot;: &quot;|&quot;, &quot;160&quot;: &quot;\\u01b0&quot;, &quot;161&quot;: &quot;\\u0092&quot;, &quot;162&quot;: &quot;\\u00b4&quot;, &quot;163&quot;: &quot;\\u2012&quot;, &quot;164&quot;: &quot;\\u00c2&quot;, &quot;165&quot;: &quot;\\u2423&quot;, &quot;166&quot;: &quot;\\u00a4&quot;, &quot;167&quot;: &quot;\\u201f&quot;, &quot;168&quot;: &quot;\\u00a0&quot;, &quot;169&quot;: &quot;\\u01a1&quot;, &quot;170&quot;: &quot;\\u0103&quot;, &quot;171&quot;: &quot;\\u0300&quot;, &quot;172&quot;: &quot;\\u215e&quot;, &quot;173&quot;: &quot;\\u20ac&quot;, &quot;174&quot;: &quot;~&quot;, &quot;175&quot;: &quot;\\u0095&quot;}&#x27;,

 &#x27;word_index&#x27;: &#x27;{&quot; &quot;: 1, &quot;e&quot;: 2, &quot;a&quot;: 3, &quot;t&quot;: 4, &quot;o&quot;: 5, &quot;n&quot;: 6, &quot;i&quot;: 7, &quot;r&quot;: 8, &quot;s&quot;: 9, &quot;l&quot;: 10, &quot;d&quot;: 11, &quot;h&quot;: 12, &quot;c&quot;: 13, &quot;u&quot;: 14, &quot;p&quot;: 15, &quot;\\n&quot;: 16, &quot;m&quot;: 17, &quot;g&quot;: 18, &quot;b&quot;: 19, &quot;,&quot;: 20, &quot;.&quot;: 21, &quot;f&quot;: 22, &quot;w&quot;: 23, &quot;\\u2022&quot;: 24, &quot;k&quot;: 25, &quot;1&quot;: 26, &quot;v&quot;: 27, &quot;y&quot;: 28, &quot;2&quot;: 29, &quot;/&quot;: 30, &quot;\\u25aa&quot;: 31, &quot;\\ufe0e&quot;: 32, &quot;S&quot;: 33, &quot;4&quot;: 34, &quot;C&quot;: 35, &quot;-&quot;: 36, &quot;3&quot;: 37, &quot;x&quot;: 38, &quot;P&quot;: 39, &quot;5&quot;: 40, &quot;0&quot;: 41, &quot;(&quot;: 42, &quot;)&quot;: 43, &quot;A&quot;: 44, &quot;B&quot;: 45, &quot;z&quot;: 46, &quot;j&quot;: 47, &quot;F&quot;: 48, &quot;T&quot;: 49, &quot;R&quot;: 50, &quot;\\ud83d\\udcd7&quot;: 51, &quot;\\ud83e\\udd55&quot;: 52, &quot;\\ud83d\\udcdd&quot;: 53, &quot;I&quot;: 54, &quot;M&quot;: 55, &quot;;&quot;: 56, &quot;q&quot;: 57, &quot;D&quot;: 58, &quot;W&quot;: 59, &quot;8&quot;: 60, &quot;G&quot;: 61, &quot;6&quot;: 62, &quot;L&quot;: 63, &quot;H&quot;: 64, &quot;:&quot;: 65, &quot;7&quot;: 66, &quot;O&quot;: 67, &quot;\&#x27;&quot;: 68, &quot;E&quot;: 69, &quot;K&quot;: 70, &quot;9&quot;: 71, &quot;U&quot;: 72, &quot;N&quot;: 73, &quot;V&quot;: 74, &quot;J&quot;: 75, &quot;\\u00ae&quot;: 76, &quot;\\u00b0&quot;: 77, &quot;\\u00e9&quot;: 78, &quot;\\&quot;&quot;: 79, &quot;Y&quot;: 80, &quot;Q&quot;: 81, &quot;*&quot;: 82, &quot;!&quot;: 83, &quot;Z&quot;: 84, &quot;\\u2013&quot;: 85, &quot;&amp;&quot;: 86, &quot;%&quot;: 87, &quot;\\u00f1&quot;: 88, &quot;\\u00e8&quot;: 89, &quot;\\u2122&quot;: 90, &quot;\\u00ee&quot;: 91, &quot;X&quot;: 92, &quot;?&quot;: 93, &quot;\\u00bf&quot;: 94, &quot;\\u2014&quot;: 95, &quot;\\u00e7&quot;: 96, &quot;#&quot;: 97, &quot;\\u00bd&quot;: 98, &quot;\\u00ed&quot;: 99, &quot;=&quot;: 100, &quot;\\u2019&quot;: 101, &quot;\\u00e2&quot;: 102, &quot;\\u00a9&quot;: 103, &quot;\\u00bc&quot;: 104, &quot;+&quot;: 105, &quot;&gt;&quot;: 106, &quot;$&quot;: 107, &quot;&lt;&quot;: 108, &quot;\\u00e1&quot;: 109, &quot;\\u00f3&quot;: 110, &quot;\\u00fa&quot;: 111, &quot;\\u00ef&quot;: 112, &quot;\\u00c9&quot;: 113, &quot;\\u00fb&quot;: 114, &quot;]&quot;: 115, &quot;[&quot;: 116, &quot;\\u00fc&quot;: 117, &quot;\\u00ea&quot;: 118, &quot;\\u00e0&quot;: 119, &quot;_&quot;: 120, &quot;\\u00ad&quot;: 121, &quot;\\u00be&quot;: 122, &quot;\\u201a&quot;: 123, &quot;\\ufffd&quot;: 124, &quot;\\u00ba&quot;: 125, &quot;\\u2044&quot;: 126, &quot;\\u00e4&quot;: 127, &quot;\\u00da&quot;: 128, &quot;\\u00f9&quot;: 129, &quot;\\u0301&quot;: 130, &quot;}&quot;: 131, &quot;\\u00f6&quot;: 132, &quot;{&quot;: 133, &quot;\\u00ec&quot;: 134, &quot;\\u00f4&quot;: 135, &quot;\\u0096&quot;: 136, &quot;\\u201d&quot;: 137, &quot;\\u00d7&quot;: 138, &quot;\\u02da&quot;: 139, &quot;\\u00bb&quot;: 140, &quot;@&quot;: 141, &quot;\\u00a7&quot;: 142, &quot;\\\\&quot;: 143, &quot;\\u25ca&quot;: 144, &quot;\\u2031&quot;: 145, &quot;\\u201c&quot;: 146, &quot;\\u2027&quot;: 147, &quot;\\u202d&quot;: 148, &quot;\\u215b&quot;: 149, &quot;\\u00e5&quot;: 150, &quot;\\ufb02&quot;: 151, &quot;`&quot;: 152, &quot;\\u00c1&quot;: 153, &quot;\\u00eb&quot;: 154, &quot;\\u0097&quot;: 155, &quot;\\u001a&quot;: 156, &quot;\\u00f8&quot;: 157, &quot;\\u2153&quot;: 158, &quot;|&quot;: 159, &quot;\\u01b0&quot;: 160, &quot;\\u0092&quot;: 161, &quot;\\u00b4&quot;: 162, &quot;\\u2012&quot;: 163, &quot;\\u00c2&quot;: 164, &quot;\\u2423&quot;: 165, &quot;\\u00a4&quot;: 166, &quot;\\u201f&quot;: 167, &quot;\\u00a0&quot;: 168, &quot;\\u01a1&quot;: 169, &quot;\\u0103&quot;: 170, &quot;\\u0300&quot;: 171, &quot;\\u215e&quot;: 172, &quot;\\u20ac&quot;: 173, &quot;~&quot;: 174, &quot;\\u0095&quot;: 175}&#x27;}</code></pre></div></blockquote><p>To get a full size of a vocabulary we need to add <code class="language-text">+1</code> to the number of already registered characters because <a href="https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text/Tokenizer">index <code class="language-text">0</code> is a reserved index that won&#x27;t be assigned to any word</a>.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">VOCABULARY_SIZE <span class="token operator">=</span> <span class="token builtin">len</span><span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>word_counts<span class="token punctuation">)</span> <span class="token operator">+</span> <span class="token number">1</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;VOCABULARY_SIZE: &#x27;</span><span class="token punctuation">,</span> VOCABULARY_SIZE<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">VOCABULARY_SIZE:  176</code></pre></div></blockquote><p>Let&#x27;s play around with tokenizer dictionaries to see how we may convert characters to indices and vice-versa:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>index_word<span class="token punctuation">[</span><span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>index_word<span class="token punctuation">[</span><span class="token number">20</span><span class="token punctuation">]</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">o
,</code></pre></div></blockquote><p>Let&#x27;s try to convert character to index:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">tokenizer<span class="token punctuation">.</span>word_index<span class="token punctuation">[</span><span class="token string">&#x27;r&#x27;</span><span class="token punctuation">]</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">8</code></pre></div></blockquote><p>To illustrate what kind of characters form all the recipes in our dataset we may print all of them as an array:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">array_vocabulary <span class="token operator">=</span> tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">[</span>word_index<span class="token punctuation">]</span> <span class="token keyword">for</span> word_index <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span>VOCABULARY_SIZE<span class="token punctuation">)</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token punctuation">[</span>char <span class="token keyword">for</span> char <span class="token keyword">in</span> array_vocabulary<span class="token punctuation">]</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">[&#x27;&#x27;, &#x27; &#x27;, &#x27;e&#x27;, &#x27;a&#x27;, &#x27;t&#x27;, &#x27;o&#x27;, &#x27;n&#x27;, &#x27;i&#x27;, &#x27;r&#x27;, &#x27;s&#x27;, &#x27;l&#x27;, &#x27;d&#x27;, &#x27;h&#x27;, &#x27;c&#x27;, &#x27;u&#x27;, &#x27;p&#x27;, &#x27;\n&#x27;, &#x27;m&#x27;, &#x27;g&#x27;, &#x27;b&#x27;, &#x27;,&#x27;, &#x27;.&#x27;, &#x27;f&#x27;, &#x27;w&#x27;, &#x27;•&#x27;, &#x27;k&#x27;, &#x27;1&#x27;, &#x27;v&#x27;, &#x27;y&#x27;, &#x27;2&#x27;, &#x27;/&#x27;, &#x27;▪&#x27;, &#x27;︎&#x27;, &#x27;S&#x27;, &#x27;4&#x27;, &#x27;C&#x27;, &#x27;-&#x27;, &#x27;3&#x27;, &#x27;x&#x27;, &#x27;P&#x27;, &#x27;5&#x27;, &#x27;0&#x27;, &#x27;(&#x27;, &#x27;)&#x27;, &#x27;A&#x27;, &#x27;B&#x27;, &#x27;z&#x27;, &#x27;j&#x27;, &#x27;F&#x27;, &#x27;T&#x27;, &#x27;R&#x27;, &#x27;📗&#x27;, &#x27;🥕&#x27;, &#x27;📝&#x27;, &#x27;I&#x27;, &#x27;M&#x27;, &#x27;;&#x27;, &#x27;q&#x27;, &#x27;D&#x27;, &#x27;W&#x27;, &#x27;8&#x27;, &#x27;G&#x27;, &#x27;6&#x27;, &#x27;L&#x27;, &#x27;H&#x27;, &#x27;:&#x27;, &#x27;7&#x27;, &#x27;O&#x27;, &quot;&#x27;&quot;, &#x27;E&#x27;, &#x27;K&#x27;, &#x27;9&#x27;, &#x27;U&#x27;, &#x27;N&#x27;, &#x27;V&#x27;, &#x27;J&#x27;, &#x27;®&#x27;, &#x27;°&#x27;, &#x27;é&#x27;, &#x27;&quot;&#x27;, &#x27;Y&#x27;, &#x27;Q&#x27;, &#x27;*&#x27;, &#x27;!&#x27;, &#x27;Z&#x27;, &#x27;–&#x27;, &#x27;&amp;&#x27;, &#x27;%&#x27;, &#x27;ñ&#x27;, &#x27;è&#x27;, &#x27;™&#x27;, &#x27;î&#x27;, &#x27;X&#x27;, &#x27;?&#x27;, &#x27;¿&#x27;, &#x27;—&#x27;, &#x27;ç&#x27;, &#x27;#&#x27;, &#x27;½&#x27;, &#x27;í&#x27;, &#x27;=&#x27;, &#x27;’&#x27;, &#x27;â&#x27;, &#x27;©&#x27;, &#x27;¼&#x27;, &#x27;+&#x27;, &#x27;&gt;&#x27;, &#x27;$&#x27;, &#x27;&lt;&#x27;, &#x27;á&#x27;, &#x27;ó&#x27;, &#x27;ú&#x27;, &#x27;ï&#x27;, &#x27;É&#x27;, &#x27;û&#x27;, &#x27;]&#x27;, &#x27;[&#x27;, &#x27;ü&#x27;, &#x27;ê&#x27;, &#x27;à&#x27;, &#x27;_&#x27;, &#x27;\xad&#x27;, &#x27;¾&#x27;, &#x27;‚&#x27;, &#x27;�&#x27;, &#x27;º&#x27;, &#x27;⁄&#x27;, &#x27;ä&#x27;, &#x27;Ú&#x27;, &#x27;ù&#x27;, &#x27;́&#x27;, &#x27;}&#x27;, &#x27;ö&#x27;, &#x27;{&#x27;, &#x27;ì&#x27;, &#x27;ô&#x27;, &#x27;\x96&#x27;, &#x27;”&#x27;, &#x27;×&#x27;, &#x27;˚&#x27;, &#x27;»&#x27;, &#x27;@&#x27;, &#x27;§&#x27;, &#x27;\\&#x27;, &#x27;◊&#x27;, &#x27;‱&#x27;, &#x27;“&#x27;, &#x27;‧&#x27;, &#x27;\u202d&#x27;, &#x27;⅛&#x27;, &#x27;å&#x27;, &#x27;ﬂ&#x27;, &#x27;`&#x27;, &#x27;Á&#x27;, &#x27;ë&#x27;, &#x27;\x97&#x27;, &#x27;\x1a&#x27;, &#x27;ø&#x27;, &#x27;⅓&#x27;, &#x27;|&#x27;, &#x27;ư&#x27;, &#x27;\x92&#x27;, &#x27;´&#x27;, &#x27;‒&#x27;, &#x27;Â&#x27;, &#x27;␣&#x27;, &#x27;¤&#x27;, &#x27;‟&#x27;, &#x27;\xa0&#x27;, &#x27;ơ&#x27;, &#x27;ă&#x27;, &#x27;̀&#x27;, &#x27;⅞&#x27;, &#x27;€&#x27;, &#x27;~&#x27;, &#x27;\x95&#x27;]</code></pre></div></blockquote><p>These are all the characters our RNN model will work with. It will try to learn how to assemble these characters into sequences that will look like recipes.</p><p>Let&#x27;s see how we may use <code class="language-text">tokenizer</code> functions to convert text to indices:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">tokenizer<span class="token punctuation">.</span>texts_to_sequences<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">&#x27;📗 yes&#x27;</span><span class="token punctuation">]</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">[[51, 1, 28, 2, 9]]</code></pre></div></blockquote><h2 id="vectorizing-the-dataset" style="position:relative">Vectorizing the dataset<a href="#vectorizing-the-dataset" aria-label="vectorizing the dataset permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>Now, once we have a vocabulary (<code class="language-text">character --&gt; code</code> and <code class="language-text">code --&gt; character</code> relations) we may convert the set of recipes from text to numbers (RNN works with numbers as an input and not with the texts).</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">dataset_vectorized <span class="token operator">=</span> tokenizer<span class="token punctuation">.</span>texts_to_sequences<span class="token punctuation">(</span>dataset_filtered<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Vectorized dataset size&#x27;</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>dataset_vectorized<span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Vectorized dataset size 100212</code></pre></div></blockquote><p>This is how the beginning of the first vectorized recipe looks like:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span>dataset_vectorized<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">10</span><span class="token punctuation">]</span><span class="token punctuation">,</span> <span class="token string">&#x27;...&#x27;</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">[51, 1, 33, 10, 5, 23, 1, 35, 5, 5] ...</code></pre></div></blockquote><p>Let&#x27;s see how can we convert vectorized recipe back to text representation:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">recipe_sequence_to_string</span><span class="token punctuation">(</span>recipe_sequence<span class="token punctuation">)</span><span class="token punctuation">:</span>
    recipe_stringified <span class="token operator">=</span> tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span><span class="token punctuation">[</span>recipe_sequence<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span>recipe_stringified<span class="token punctuation">)</span>

recipe_sequence_to_string<span class="token punctuation">(</span>dataset_vectorized<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">📗 Slow Cooker Chicken and Dumplings

🥕

• 4 skinless, boneless chicken breast halves
• 2 tablespoons butter
• 2 (10.75 ounce) cans condensed cream of chicken soup
• 1 onion, finely diced
• 2 (10 ounce) packages refrigerated biscuit dough, torn into pieces

📝

▪︎ Place the chicken, butter, soup, and onion in a slow cooker, and fill with enough water to cover.
▪︎ Cover, and cook for 5 to 6 hours on High. About 30 minutes before serving, place the torn biscuit dough in the slow cooker. Cook until the dough is no longer raw in the center.</code></pre></div></blockquote><h3 id="add-padding-to-sequences" style="position:relative">Add padding to sequences<a href="#add-padding-to-sequences" aria-label="add padding to sequences permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>We need all recipes to have the same length for training. To do that we&#x27;ll use <a href="https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/sequence/pad_sequences">tf.keras.preprocessing.sequence.pad_sequences</a> utility to add a stop word to the end of each recipe and to make them have the same length.</p><p>Let&#x27;s check the recipes lengths:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">for</span> recipe_index<span class="token punctuation">,</span> recipe <span class="token keyword">in</span> <span class="token builtin">enumerate</span><span class="token punctuation">(</span>dataset_vectorized<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">10</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Recipe #{} length: {}&#x27;</span><span class="token punctuation">.</span><span class="token builtin">format</span><span class="token punctuation">(</span>recipe_index <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>recipe<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Recipe #1 length: 546
Recipe #2 length: 401
Recipe #3 length: 671
Recipe #4 length: 736
Recipe #5 length: 1518
Recipe #6 length: 740
Recipe #7 length: 839
Recipe #8 length: 667
Recipe #9 length: 1264
Recipe #10 length: 854</code></pre></div></blockquote><p>Let&#x27;s pad all recipes with a <code class="language-text">STOP_SIGN</code>:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">dataset_vectorized_padded_without_stops <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>sequence<span class="token punctuation">.</span>pad_sequences<span class="token punctuation">(</span>
    dataset_vectorized<span class="token punctuation">,</span>
    padding<span class="token operator">=</span><span class="token string">&#x27;post&#x27;</span><span class="token punctuation">,</span>
    truncating<span class="token operator">=</span><span class="token string">&#x27;post&#x27;</span><span class="token punctuation">,</span>
    <span class="token comment"># We use -1 here and +1 in the next step to make sure</span>
    <span class="token comment"># that all recipes will have at least 1 stops sign at the end,</span>
    <span class="token comment"># since each sequence will be shifted and truncated afterwards</span>
    <span class="token comment"># (to generate X and Y sequences).</span>
    maxlen<span class="token operator">=</span>MAX_RECIPE_LENGTH<span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">,</span>
    value<span class="token operator">=</span>tokenizer<span class="token punctuation">.</span>texts_to_sequences<span class="token punctuation">(</span><span class="token punctuation">[</span>STOP_SIGN<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
<span class="token punctuation">)</span>

dataset_vectorized_padded <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>sequence<span class="token punctuation">.</span>pad_sequences<span class="token punctuation">(</span>
    dataset_vectorized_padded_without_stops<span class="token punctuation">,</span>
    padding<span class="token operator">=</span><span class="token string">&#x27;post&#x27;</span><span class="token punctuation">,</span>
    truncating<span class="token operator">=</span><span class="token string">&#x27;post&#x27;</span><span class="token punctuation">,</span>
    maxlen<span class="token operator">=</span>MAX_RECIPE_LENGTH<span class="token operator">+</span><span class="token number">1</span><span class="token punctuation">,</span>
    value<span class="token operator">=</span>tokenizer<span class="token punctuation">.</span>texts_to_sequences<span class="token punctuation">(</span><span class="token punctuation">[</span>STOP_SIGN<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
<span class="token punctuation">)</span>

<span class="token keyword">for</span> recipe_index<span class="token punctuation">,</span> recipe <span class="token keyword">in</span> <span class="token builtin">enumerate</span><span class="token punctuation">(</span>dataset_vectorized_padded<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">10</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Recipe #{} length: {}&#x27;</span><span class="token punctuation">.</span><span class="token builtin">format</span><span class="token punctuation">(</span>recipe_index<span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>recipe<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Recipe #0 length: 2001
Recipe #1 length: 2001
Recipe #2 length: 2001
Recipe #3 length: 2001
Recipe #4 length: 2001
Recipe #5 length: 2001
Recipe #6 length: 2001
Recipe #7 length: 2001
Recipe #8 length: 2001
Recipe #9 length: 2001</code></pre></div></blockquote><p>After the padding all recipes in the dataset now have the same length and RNN will also be able to learn where each recipe stops (by observing the presence of a <code class="language-text">STOP_SIGN</code>).</p><p>Here is an example of how a first recipe looks like after the padding.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">recipe_sequence_to_string<span class="token punctuation">(</span>dataset_vectorized_padded<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">📗 Slow Cooker Chicken and Dumplings

🥕

• 4 skinless, boneless chicken breast halves
• 2 tablespoons butter
• 2 (10.75 ounce) cans condensed cream of chicken soup
• 1 onion, finely diced
• 2 (10 ounce) packages refrigerated biscuit dough, torn into pieces

📝

▪︎ Place the chicken, butter, soup, and onion in a slow cooker, and fill with enough water to cover.
▪︎ Cover, and cook for 5 to 6 hours on High. About 30 minutes before serving, place the torn biscuit dough in the slow cooker. Cook until the dough is no longer raw in the center.
␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣</code></pre></div></blockquote><p>All recipes now end with one or many <code class="language-text">␣</code> signs. We expect our LSTM model to learn that whenever it sees the <code class="language-text">␣</code> stop-character it means that the recipe is ended. Once the network will learn this concept it will put stop-character at the end of every newly generated recipe.</p><h3 id="create-tensorflow-dataset" style="position:relative">Create TensorFlow dataset<a href="#create-tensorflow-dataset" aria-label="create tensorflow dataset permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>Up until now we were working with the dataset as with the NumPy array. It will be more convenient during the training process if we will convert a dataset NumPy array to a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset">TensorFlow dataset</a>. It will give us an ability to use such helpers functions as <code class="language-text">batch()</code>, <code class="language-text">shuffle()</code>, <code class="language-text">repeat()</code>, <code class="language-text">prefecth()</code> etc.:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">dataset <span class="token operator">=</span> tf<span class="token punctuation">.</span>data<span class="token punctuation">.</span>Dataset<span class="token punctuation">.</span>from_tensor_slices<span class="token punctuation">(</span>dataset_vectorized_padded<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span>dataset<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">&lt;TensorSliceDataset shapes: (2001,), types: tf.int32&gt;</code></pre></div></blockquote><p>Let&#x27;s see what the first recipe in the dataset looks like by using a TensorFlow dataset API this time:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">for</span> recipe <span class="token keyword">in</span> dataset<span class="token punctuation">.</span>take<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Raw recipe:\n&#x27;</span><span class="token punctuation">,</span> recipe<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">&#x27;\n\n\n&#x27;</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Stringified recipe:\n&#x27;</span><span class="token punctuation">)</span>
    recipe_sequence_to_string<span class="token punctuation">(</span>recipe<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Raw recipe:
 [ 51   1  33 ... 165 165 165]

Stringified recipe:

📗 Slow Cooker Chicken and Dumplings

🥕

• 4 skinless, boneless chicken breast halves
• 2 tablespoons butter
• 2 (10.75 ounce) cans condensed cream of chicken soup
• 1 onion, finely diced
• 2 (10 ounce) packages refrigerated biscuit dough, torn into pieces

📝

▪︎ Place the chicken, butter, soup, and onion in a slow cooker, and fill with enough water to cover.
▪︎ Cover, and cook for 5 to 6 hours on High. About 30 minutes before serving, place the torn biscuit dough in the slow cooker. Cook until the dough is no longer raw in the center.
␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣</code></pre></div></blockquote><h3 id="split-examples-on-input-and-target-texts" style="position:relative">Split examples on <code class="language-text">input</code> and <code class="language-text">target</code> texts<a href="#split-examples-on-input-and-target-texts" aria-label="split examples on input and target texts permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>For each sequence we need to duplicate and shift it to form the <code class="language-text">input</code> and <code class="language-text">target</code> texts. For example, say the <code class="language-text">sequence_length</code> is <code class="language-text">4</code> and our text is <code class="language-text">Hello</code>. The input sequence would be <code class="language-text">Hell</code>, and the target sequence <code class="language-text">ello</code>.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">split_input_target</span><span class="token punctuation">(</span>recipe<span class="token punctuation">)</span><span class="token punctuation">:</span>
    input_text <span class="token operator">=</span> recipe<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">]</span>
    target_text <span class="token operator">=</span> recipe<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">:</span><span class="token punctuation">]</span>

    <span class="token keyword">return</span> input_text<span class="token punctuation">,</span> target_text

dataset_targeted <span class="token operator">=</span> dataset<span class="token punctuation">.</span><span class="token builtin">map</span><span class="token punctuation">(</span>split_input_target<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span>dataset_targeted<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">&lt;MapDataset shapes: ((2000,), (2000,)), types: (tf.int32, tf.int32)&gt;</code></pre></div></blockquote><p>You may notice from the line above, that now each example in the dataset consists of two tuples: input and target. Let&#x27;s print an example:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">for</span> input_example<span class="token punctuation">,</span> target_example <span class="token keyword">in</span> dataset_targeted<span class="token punctuation">.</span>take<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Input sequence size:&#x27;</span><span class="token punctuation">,</span> <span class="token builtin">repr</span><span class="token punctuation">(</span><span class="token builtin">len</span><span class="token punctuation">(</span>input_example<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Target sequence size:&#x27;</span><span class="token punctuation">,</span> <span class="token builtin">repr</span><span class="token punctuation">(</span><span class="token builtin">len</span><span class="token punctuation">(</span>target_example<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token punctuation">)</span>

    input_stringified <span class="token operator">=</span> tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span><span class="token punctuation">[</span>input_example<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">50</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
    target_stringified <span class="token operator">=</span> tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span><span class="token punctuation">[</span>target_example<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">50</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>

    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Input:  &#x27;</span><span class="token punctuation">,</span> <span class="token builtin">repr</span><span class="token punctuation">(</span><span class="token string">&#x27;&#x27;</span><span class="token punctuation">.</span>join<span class="token punctuation">(</span>input_stringified<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Target: &#x27;</span><span class="token punctuation">,</span> <span class="token builtin">repr</span><span class="token punctuation">(</span><span class="token string">&#x27;&#x27;</span><span class="token punctuation">.</span>join<span class="token punctuation">(</span>target_stringified<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Input sequence size: 2000
Target sequence size: 2000

Input:   &#x27;📗   S l o w   C o o k e r   C h i c k e n   a n d   D u m p l i n g s \n \n 🥕 \n \n •   4   s k i n l e&#x27;
Target:  &#x27;  S l o w   C o o k e r   C h i c k e n   a n d   D u m p l i n g s \n \n 🥕 \n \n •   4   s k i n l e s&#x27;</code></pre></div></blockquote><p>Each index of these vectors is processed as one time step by RNN. For the input at time step <code class="language-text">0</code>, the model receives the index for <code class="language-text">📗</code> and tries to predict the index for <code class="language-text"> </code> (a space character) as the next character. At the next time-step, it does the same thing, but the RNN considers the previous step context in addition to the current input character.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">for</span> i<span class="token punctuation">,</span> <span class="token punctuation">(</span>input_idx<span class="token punctuation">,</span> target_idx<span class="token punctuation">)</span> <span class="token keyword">in</span> <span class="token builtin">enumerate</span><span class="token punctuation">(</span><span class="token builtin">zip</span><span class="token punctuation">(</span>input_example<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">10</span><span class="token punctuation">]</span><span class="token punctuation">,</span> target_example<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">10</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Step {:2d}&#x27;</span><span class="token punctuation">.</span><span class="token builtin">format</span><span class="token punctuation">(</span>i <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;  input: {} ({:s})&#x27;</span><span class="token punctuation">.</span><span class="token builtin">format</span><span class="token punctuation">(</span>input_idx<span class="token punctuation">,</span> <span class="token builtin">repr</span><span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">[</span>input_idx<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;  expected output: {} ({:s})&#x27;</span><span class="token punctuation">.</span><span class="token builtin">format</span><span class="token punctuation">(</span>target_idx<span class="token punctuation">,</span> <span class="token builtin">repr</span><span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">[</span>target_idx<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Step  1
  input: 51 (&#x27;📗&#x27;)
  expected output: 1 (&#x27; &#x27;)
Step  2
  input: 1 (&#x27; &#x27;)
  expected output: 33 (&#x27;S&#x27;)
Step  3
  input: 33 (&#x27;S&#x27;)
  expected output: 10 (&#x27;l&#x27;)
Step  4
  input: 10 (&#x27;l&#x27;)
  expected output: 5 (&#x27;o&#x27;)
Step  5
  input: 5 (&#x27;o&#x27;)
  expected output: 23 (&#x27;w&#x27;)
Step  6
  input: 23 (&#x27;w&#x27;)
  expected output: 1 (&#x27; &#x27;)
Step  7
  input: 1 (&#x27; &#x27;)
  expected output: 35 (&#x27;C&#x27;)
Step  8
  input: 35 (&#x27;C&#x27;)
  expected output: 5 (&#x27;o&#x27;)
Step  9
  input: 5 (&#x27;o&#x27;)
  expected output: 5 (&#x27;o&#x27;)
Step 10
  input: 5 (&#x27;o&#x27;)
  expected output: 25 (&#x27;k&#x27;)</code></pre></div></blockquote><h3 id="split-up-the-dataset-into-batches" style="position:relative">Split up the dataset into batches<a href="#split-up-the-dataset-into-batches" aria-label="split up the dataset into batches permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>We have <code class="language-text">~100k</code> recipes in the dataset, and each recipe has two tuples of <code class="language-text">2000</code> characters.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span>dataset_targeted<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">&lt;MapDataset shapes: ((2000,), (2000,)), types: (tf.int32, tf.int32)&gt;</code></pre></div></blockquote><p>Let&#x27;s print constants values:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;TOTAL_RECIPES_NUM: &#x27;</span><span class="token punctuation">,</span> TOTAL_RECIPES_NUM<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;MAX_RECIPE_LENGTH: &#x27;</span><span class="token punctuation">,</span> MAX_RECIPE_LENGTH<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;VOCABULARY_SIZE: &#x27;</span><span class="token punctuation">,</span> VOCABULARY_SIZE<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">TOTAL_RECIPES_NUM:  100212
MAX_RECIPE_LENGTH:  2000
VOCABULARY_SIZE:  176</code></pre></div></blockquote><p>If we will feed the complete dataset during the training process to the model and then will try to do a back-propagation for all examples at once we might run out of memory and each training epoch may take too long to execute. To avoid the situation like this we need to split our dataset into batches.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token comment"># Batch size.</span>
BATCH_SIZE <span class="token operator">=</span> <span class="token number">64</span>

<span class="token comment"># Buffer size to shuffle the dataset (TF data is designed to work</span>
<span class="token comment"># with possibly infinite sequences, so it doesn&#x27;t attempt to shuffle</span>
<span class="token comment"># the entire sequence in memory. Instead, it maintains a buffer in</span>
<span class="token comment"># which it shuffles elements).</span>
SHUFFLE_BUFFER_SIZE <span class="token operator">=</span> <span class="token number">1000</span>

dataset_train <span class="token operator">=</span> dataset_targeted \
  <span class="token comment"># Shuffling examples first.</span>
  <span class="token punctuation">.</span>shuffle<span class="token punctuation">(</span>SHUFFLE_BUFFER_SIZE<span class="token punctuation">)</span> \
  <span class="token comment"># Splitting examples on batches.</span>
  <span class="token punctuation">.</span>batch<span class="token punctuation">(</span>BATCH_SIZE<span class="token punctuation">,</span> drop_remainder<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">)</span> \
  <span class="token comment"># Making a dataset to be repeatable (it will never ends).</span>
  <span class="token punctuation">.</span>repeat<span class="token punctuation">(</span><span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span>dataset_train<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">&lt;RepeatDataset shapes: ((64, 2000), (64, 2000)), types: (tf.int32, tf.int32)&gt;</code></pre></div></blockquote><p>From the line above you may notice that our dataset now consists of the same two tuples of <code class="language-text">2000</code> characters but now they are grouped in the batches by <code class="language-text">64</code>.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">for</span> input_text<span class="token punctuation">,</span> target_text <span class="token keyword">in</span> dataset_train<span class="token punctuation">.</span>take<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;1st batch: input_text:&#x27;</span><span class="token punctuation">,</span> input_text<span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;1st batch: target_text:&#x27;</span><span class="token punctuation">,</span> target_text<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">1st batch: input_text: tf.Tensor(
[[ 51   1  54 ... 165 165 165]
 [ 51   1  64 ... 165 165 165]
 [ 51   1  44 ... 165 165 165]
 ...
 [ 51   1  69 ... 165 165 165]
 [ 51   1  55 ... 165 165 165]
 [ 51   1  70 ... 165 165 165]], shape=(64, 2000), dtype=int32)

1st batch: target_text: tf.Tensor(
[[  1  54   4 ... 165 165 165]
 [  1  64   5 ... 165 165 165]
 [  1  44   6 ... 165 165 165]
 ...
 [  1  69   3 ... 165 165 165]
 [  1  55   3 ... 165 165 165]
 [  1  70   2 ... 165 165 165]], shape=(64, 2000), dtype=int32)</code></pre></div></blockquote><h2 id="build-the-model" style="position:relative">Build the model<a href="#build-the-model" aria-label="build the model permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>We will use <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Sequential">tf.keras.Sequential</a> to define the model. For this experiment we will use the following layer types:</p><ul><li><a href="https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding">tf.keras.layers.Embedding</a> - the input layer (a trainable lookup table that will map the numbers of each character to a vector with <code class="language-text">embedding_dim</code> dimensions),</li><li><a href="https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM">tf.keras.layers.LSTM</a> - a type of RNN with size <code class="language-text">units=rnn_units</code> (you can also use a <a href="https://www.tensorflow.org/api_docs/python/tf/keras/layers/GRU">GRU</a> layer here),</li><li><a href="https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense">tf.keras.layers.Dense</a> - the output layer, with <code class="language-text">VOCABULARY_SIZE</code> outputs.</li></ul><h3 id="figuring-out-how-the-embedding-layer-works" style="position:relative">Figuring out how the Embedding Layer works<a href="#figuring-out-how-the-embedding-layer-works" aria-label="figuring out how the embedding layer works permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>Let&#x27;s do a quick detour and see how Embedding Layer works. It takes several char indices sequences (batch) as an input. It encodes every character of every sequence to a vector of <code class="language-text">tmp_embedding_size</code> length.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">tmp_vocab_size <span class="token operator">=</span> <span class="token number">10</span>
tmp_embedding_size <span class="token operator">=</span> <span class="token number">5</span>
tmp_input_length <span class="token operator">=</span> <span class="token number">8</span>
tmp_batch_size <span class="token operator">=</span> <span class="token number">2</span>

tmp_model <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>models<span class="token punctuation">.</span>Sequential<span class="token punctuation">(</span><span class="token punctuation">)</span>
tmp_model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>Embedding<span class="token punctuation">(</span>
  input_dim<span class="token operator">=</span>tmp_vocab_size<span class="token punctuation">,</span>
  output_dim<span class="token operator">=</span>tmp_embedding_size<span class="token punctuation">,</span>
  input_length<span class="token operator">=</span>tmp_input_length
<span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token comment"># The model will take as input an integer matrix of size (batch, input_length).</span>
<span class="token comment"># The largest integer (i.e. word index) in the input should be no larger than 9 (tmp_vocab_size).</span>
<span class="token comment"># Now model.output_shape == (None, 10, 64), where None is the batch dimension.</span>
tmp_input_array <span class="token operator">=</span> np<span class="token punctuation">.</span>random<span class="token punctuation">.</span>randint<span class="token punctuation">(</span>
  low<span class="token operator">=</span><span class="token number">0</span><span class="token punctuation">,</span>
  high<span class="token operator">=</span>tmp_vocab_size<span class="token punctuation">,</span>
  size<span class="token operator">=</span><span class="token punctuation">(</span>tmp_batch_size<span class="token punctuation">,</span> tmp_input_length<span class="token punctuation">)</span>
<span class="token punctuation">)</span>
tmp_model<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">&#x27;rmsprop&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;mse&#x27;</span><span class="token punctuation">)</span>
tmp_output_array <span class="token operator">=</span> tmp_model<span class="token punctuation">.</span>predict<span class="token punctuation">(</span>tmp_input_array<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;tmp_input_array shape:&#x27;</span><span class="token punctuation">,</span> tmp_input_array<span class="token punctuation">.</span>shape<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;tmp_input_array:&#x27;</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>tmp_input_array<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;tmp_output_array shape:&#x27;</span><span class="token punctuation">,</span> tmp_output_array<span class="token punctuation">.</span>shape<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;tmp_output_array:&#x27;</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>tmp_output_array<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">tmp_input_array shape: (2, 8)
tmp_input_array:
[[2 4 7 5 1 6 9 7]
 [3 6 8 1 4 0 1 2]]

tmp_output_array shape: (2, 8, 5)
tmp_output_array:
[[[-0.02229502 -0.02800617 -0.0120693  -0.01681594 -0.00650246]
  [-0.03046973 -0.03920818  0.04956308  0.04417323 -0.00446874]
  [-0.0215276   0.01532575 -0.02229529  0.02834387  0.02725342]
  [ 0.04567988  0.0141306   0.00877035 -0.02601192  0.00380837]
  [ 0.02969306  0.02994296 -0.00233263  0.00716375 -0.00847433]
  [ 0.04598364 -0.00704358 -0.01386416  0.01195388 -0.00309662]
  [-0.00137572  0.01275543 -0.02348721 -0.04825885  0.00527108]
  [-0.0215276   0.01532575 -0.02229529  0.02834387  0.02725342]]

 [[ 0.01082945  0.03824175 -0.00450991 -0.02865709  0.02502238]
  [ 0.04598364 -0.00704358 -0.01386416  0.01195388 -0.00309662]
  [ 0.02275398  0.03806095 -0.03491788  0.04705564  0.00167596]
  [ 0.02969306  0.02994296 -0.00233263  0.00716375 -0.00847433]
  [-0.03046973 -0.03920818  0.04956308  0.04417323 -0.00446874]
  [-0.02909902  0.04426369  0.00150937  0.04579213  0.02559013]
  [ 0.02969306  0.02994296 -0.00233263  0.00716375 -0.00847433]
  [-0.02229502 -0.02800617 -0.0120693  -0.01681594 -0.00650246]]]</code></pre></div></blockquote><h3 id="lstm-model" style="position:relative">LSTM Model<a href="#lstm-model" aria-label="lstm model permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>Let&#x27;s assemble the model.</p><p>ℹ️ You may check <a href="https://www.tensorflow.org/tutorials/text/text_generation">Text generation with an RNN</a> notebook from TensorFlow documentation for more details on model components.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">build_model</span><span class="token punctuation">(</span>vocab_size<span class="token punctuation">,</span> embedding_dim<span class="token punctuation">,</span> rnn_units<span class="token punctuation">,</span> batch_size<span class="token punctuation">)</span><span class="token punctuation">:</span>
    model <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>models<span class="token punctuation">.</span>Sequential<span class="token punctuation">(</span><span class="token punctuation">)</span>

    model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>Embedding<span class="token punctuation">(</span>
        input_dim<span class="token operator">=</span>vocab_size<span class="token punctuation">,</span>
        output_dim<span class="token operator">=</span>embedding_dim<span class="token punctuation">,</span>
        batch_input_shape<span class="token operator">=</span><span class="token punctuation">[</span>batch_size<span class="token punctuation">,</span> <span class="token boolean">None</span><span class="token punctuation">]</span>
    <span class="token punctuation">)</span><span class="token punctuation">)</span>

    model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>LSTM<span class="token punctuation">(</span>
        units<span class="token operator">=</span>rnn_units<span class="token punctuation">,</span>
        return_sequences<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
        stateful<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
        recurrent_initializer<span class="token operator">=</span>tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>initializers<span class="token punctuation">.</span>GlorotNormal<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token punctuation">)</span><span class="token punctuation">)</span>

    model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>Dense<span class="token punctuation">(</span>vocab_size<span class="token punctuation">)</span><span class="token punctuation">)</span>

    <span class="token keyword">return</span> model

model <span class="token operator">=</span> build_model<span class="token punctuation">(</span>
  vocab_size<span class="token operator">=</span>VOCABULARY_SIZE<span class="token punctuation">,</span>
  embedding_dim<span class="token operator">=</span><span class="token number">256</span><span class="token punctuation">,</span>
  rnn_units<span class="token operator">=</span><span class="token number">1024</span><span class="token punctuation">,</span>
  batch_size<span class="token operator">=</span>BATCH_SIZE
<span class="token punctuation">)</span>

model<span class="token punctuation">.</span>summary<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Model: &quot;sequential_13&quot;
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
embedding_13 (Embedding)     (64, None, 256)           45056
_________________________________________________________________
lstm_9 (LSTM)                (64, None, 1024)          5246976
_________________________________________________________________
dense_8 (Dense)              (64, None, 176)           180400
=================================================================
Total params: 5,472,432
Trainable params: 5,472,432
Non-trainable params: 0
_________________________________________________________________</code></pre></div></blockquote><p>Let&#x27;s visualize the model:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>utils<span class="token punctuation">.</span>plot_model<span class="token punctuation">(</span>
    model<span class="token punctuation">,</span>
    show_shapes<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
    show_layer_names<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
    to_file<span class="token operator">=</span><span class="token string">&#x27;model.png&#x27;</span>
<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><p><span class="gatsby-resp-image-wrapper" style="position:relative;display:block;margin-left:auto;margin-right:auto;max-width:439px">
      <span class="gatsby-resp-image-background-image" style="padding-bottom:89.99999999999999%;position:relative;bottom:0;left:0;background-image:url(&#x27;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAASCAYAAABb0P4QAAAACXBIWXMAAAsTAAALEwEAmpwYAAACVUlEQVQ4y2WUiQ7iMAxE+/+fhQRCHOIoUKBAuVtKy424QrJ61hqxS6UhzmHHnnHw0jQ11+vVnM9ngdqXy0Wgto5JkpgwDI3v+ybLMhMEgel2uyaKIjOZTIy3Xq/dYDBwvu+7VqvlVquV2263LgxDwXQ6de122w2HQzeZTFySJHK2UqmIzV6z2ZQ94nhxHNs8z+12u7VpmtokSWSODbA5w3qWZXa5XFrf922lUrGLxcIGQWDb7baNosiOx2PrvV4vpzDGCNTW8Xw+u9vtJvbj8XDv99tZaz9+zNXXY8Ih8Hw+/7Hv97vjo3RKIghgjwsY8dezEnC327n9fu+Ox6PDPp1OYud5/lnfbDaC2Wzm6vW6BM+yTNbgcLlcylng8TMej4VY0Ov15DBOnU5HQJDFYuGiKHKlUknIZ844Go2kAkTDTwIeDgfhCWhml8tFsmWN/TRNxalWq0mg+Xwu2aF0v98X5SVDOKB+5Q6Sv7mEF6VCxVDBlD8VhdFzfz/dBN8qEwBKKBFbyf+/E/TzOMRH/YC0KZtSaHrQ7Xal1DiOpdHhmgsYoYEKtAMkQwx4ggu4QVnECYJAghCwWq2Kyqwx50UhFqLxsjTOJ0MygycVAlGu16uIgVOj0ZDLtCe5iEsRiGR+Aio/KgY2gFscuOD7RSi/KtRPwO+XouKoymRFdjjrC/m+mPlPQLoeLlQUSsVmhLdCoSCCwDN8IgoclstlsX8Coipkaza0ChyhMmOxWBRFeWaoi0Ccp6FJ5icgWcEVQGUFh3le/Ffq/yFzLkJpniNtowH/AIKzSWgPUid0AAAAAElFTkSuQmCC&#x27;);background-size:cover;display:block"></span>
  <img class="gatsby-resp-image-image" alt="Model architecture" title="Model architecture" src="/static/69dd454b38a2cb834ff2399587d6cd92/e3b18/4.png" srcSet="/static/69dd454b38a2cb834ff2399587d6cd92/63868/4.png 250w,/static/69dd454b38a2cb834ff2399587d6cd92/e3b18/4.png 439w" sizes="(max-width: 439px) 100vw, 439px" style="width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0" loading="lazy"/>
    </span></p><p>For each character the model looks up the embedding, runs the LSTM one time-step with the embedding as input, and applies the dense layer to generate logits predicting the log-likelihood of the next character:</p><p><span class="gatsby-resp-image-wrapper" style="position:relative;display:block;margin-left:auto;margin-right:auto;max-width:842px">
      <span class="gatsby-resp-image-background-image" style="padding-bottom:86.80000000000001%;position:relative;bottom:0;left:0;background-image:url(&#x27;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAARCAYAAADdRIy+AAAACXBIWXMAAAsTAAALEwEAmpwYAAAEjUlEQVQ4y22Ue0xbVRzHf0Dby2vhMQJsZki0TlwEeWyIpCIsczhxvDZxsFEYjEfYgPKUd6EDy7OlUEpLKW0H5dGW0pa+eG6Xh4QBBnCAZIz4ik7nEl38w7gl19wCMTh/yS/n5J6Tz/n9zvd7LmAYZoFhGDxYHOJ9NSd7srmk2BiVs1O/WRleXp7pe7xxXyGBvSBuLinUS+idxztr6nVqfMTbAHDU2hpxB4CDtAAMew44UKfk+rDrC9JlorpIfmu5h2ag4TMRt4SGGnjBAGAJAFaG4Y5ANjM/rb/ni4uZaYmItLsZPqCcgUOBw/DcXagVo8P5P61PVqyvmMqqHkxXbk/Lac8ezjN0ODDlCgXZXagdmxjI+f7hHGPDJMs5tWIqBxk31fJ/gVsmGqWr7lKRmk9NWpYnv67uTPio8mZoypYhyxevDrEAwqaJFs6tjiocE6dQl/rjbXOvBcBLsfvskUXEJ2FE7ZxO1C7lLQ6OyacUE4o2+YTS2CkTLIzO6wZ8T58iVjVU2KhR7ShHzJ1X3VWjtZzacAA48oYX2Z1IJLriczNQNCSApe0vwYBqaGwRky/T9DSrTEO5gzopi9XNFBtQDf2EmyeSkplkOzqlYrQI67hyY1+bQMzz/hv7w9wdABBcXF1I5jlH1EITK4X0i+lnfViqalldb0kevlAuzKa2KKslZzP8XoVYsFxG14BaE+nPVjH0pfyb0fg1dA12FMo0EvqQUeYeHRe513J9e20/p7tlKq8xI4avb/6uXV3fh8vPUjL4XQb2nyVdWWEAQML3MnoLLwl0rL9YCkYZeIJNs6B+vInH3CiqysOdYO1+3N0OetVi0KNqwPIwy5MuZK8QMsUNrzD2/ahjQe+c9lPky5Ho0vcAvAHiIZNAAhKZbE92drrwr6AAYIsgiBMA2MAP325b4R+n5dqByUENNqsyPrnTyqfNjZh+nBxQY/cUOt2BsWdVRnRyUP1iST/9c3JcwrsA4OzhceI4gUB4BQDcAMABtu/eN58iauOdTKdeT6woKD7Pbmxyqfm8/NyNxCSqkNPxFn5fDo6OBAGrLeBGYnLq7dLKiIrSMqRfLAVfH5/DtpmTakHSxIXWW1WUSmr21cbM0ihefm0QM604suZ63tX23Jow/MDn6COLutTC4OIrGZE1ybQPOTl0p9bsKmCmFx8GXg69gA+WuhbJb6p6IaZp6sGGmV07epYUUzK7MG2T+EV4YIh1WdItB2Nrr3l9jCPDeumcT9UN3dBecNvqEDD4GhXA0dZipKjhtYTAsPNVUdQzirIWT0ZsSkBcQEi4qrCebIkgxIA3vUmS7Gr/WH9KDP1yathUk8T+nF/Qyy/lQKlu3b1QwfBYkViPptI7pV5S42y6cGSiXGyYifhlT0l8T4xgeLxMYpzNpHwchYtwhGRtbfdfoPn3NWSc52tnVp+Ozq5tsYUDaVp0dW10du2pcmIR96UViYQgyvFFrRZd/VU/9/VWSla+N24TR+ej9oeAWYUVsPO7uYJjAOACAE77abdvBVciiWSDW2Q/HQEAtwnR7CUS6VDL/wAfPhMVs1IauwAAAABJRU5ErkJggg==&#x27;);background-size:cover;display:block"></span>
  <img class="gatsby-resp-image-image" alt="Model architecture" title="Model architecture" src="/static/c47fb3afb041ecd74a969c20df71db1c/99072/5.png" srcSet="/static/c47fb3afb041ecd74a969c20df71db1c/63868/5.png 250w,/static/c47fb3afb041ecd74a969c20df71db1c/0b533/5.png 500w,/static/c47fb3afb041ecd74a969c20df71db1c/99072/5.png 842w" sizes="(max-width: 842px) 100vw, 842px" style="width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0" loading="lazy"/>
    </span></p><blockquote><p>Image source: <a href="https://www.tensorflow.org/tutorials/text/text_generation">Text generation with an RNN</a> notebook.</p></blockquote><p>The picture above illustrates GRU network, but you may easily replace GRU with LSTM.</p><h2 id="trying-the-model-before-training" style="position:relative">Trying the model before training<a href="#trying-the-model-before-training" aria-label="trying the model before training permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>Let&#x27;s play around with un-trained model to see its interface (what input do we need and what output will we have) and let&#x27;s see what model predicts before the training:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">for</span> input_example_batch<span class="token punctuation">,</span> target_example_batch <span class="token keyword">in</span> dataset_train<span class="token punctuation">.</span>take<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    example_batch_predictions <span class="token operator">=</span> model<span class="token punctuation">(</span>input_example_batch<span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span>example_batch_predictions<span class="token punctuation">.</span>shape<span class="token punctuation">,</span> <span class="token string">&quot;# (batch_size, sequence_length, vocab_size)&quot;</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">(64, 2000, 176) # (batch_size, sequence_length, vocab_size)</code></pre></div></blockquote><p>To get actual predictions from the model we need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Prediction for the 1st letter of the batch 1st sequense:&#x27;</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>example_batch_predictions<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Prediction for the 1st letter of the batch 1st sequense:
tf.Tensor(
[-9.0643829e-03 -1.9503604e-03  9.3381782e-04  3.7442446e-03
 -2.0541784e-03 -7.4054599e-03 -7.1884273e-03  2.6014952e-03
  4.8721582e-03  3.0045470e-04  2.6016519e-04 -4.1374690e-03
  5.3856964e-03  2.6284808e-03 -5.6002503e-03  2.6019611e-03
 -1.9491187e-03 -3.1097094e-04  6.3465843e-03  1.4640498e-03
  2.4560774e-03 -3.1256995e-03  1.4104056e-03  2.5478401e-04
  5.4266443e-03 -4.1188141e-03  3.6904984e-03 -5.8337618e-03
  3.6372752e-03 -3.1899021e-05  3.2178329e-03  1.5033322e-04
  5.2770867e-04 -8.1920059e-04 -2.2364906e-03 -2.3271297e-03
  4.4109682e-03  4.2381673e-04  1.0532180e-03 -1.4208974e-03
 -3.2446394e-03 -4.5869066e-03  4.3250201e-04 -4.3490473e-03
  3.7889536e-03 -9.2122913e-04  7.8936084e-04 -9.7079907e-04
  1.7070504e-03 -2.5260956e-03  6.7904620e-03  1.5470090e-03
 -9.4337866e-04 -1.5072266e-03  6.8939931e-04 -1.0795534e-03
 -3.1912089e-03  2.3665284e-03  1.7737487e-03 -2.3504677e-03
 -6.8649277e-04  9.6421910e-04 -4.1204207e-03 -3.8750230e-03
  1.9077851e-03  4.7145790e-05 -2.9846188e-03  5.8050319e-03
 -5.6210475e-04 -2.5910907e-04  5.2890396e-03 -5.8653783e-03
 -6.0040038e-06  2.3905798e-03 -2.9405006e-03  2.0132761e-03
 -3.5594390e-03  4.0282350e-04  4.7719614e-03 -2.4438011e-03
 -1.1028582e-03  2.0007135e-03 -1.6961874e-03 -4.2196750e-03
 -3.5689408e-03 -4.1934610e-03 -8.5307617e-04  1.5773368e-04
 -1.4612130e-03  9.5826073e-04  4.0543079e-04 -2.3562380e-04
 -1.5394683e-03  3.6650903e-03  3.5997448e-03  2.2390878e-03
 -6.8982318e-04  1.4068574e-03 -2.0531749e-03 -1.5443334e-03
 -1.8235333e-03 -3.2099178e-03  1.6660831e-03  1.2230751e-03
  3.8084832e-03  6.9559496e-03  5.7684043e-03  3.1751506e-03
  7.4234616e-04  1.1971325e-04 -2.7798198e-03  2.1485630e-03
  4.0362971e-03  6.4410735e-05  1.7432809e-03  3.2334479e-03
 -6.1469898e-03 -2.2205685e-03 -1.0864032e-03 -2.0876178e-07
  2.3065242e-03 -1.5816523e-03 -2.1492387e-03 -4.4033155e-03
  1.1003019e-03 -9.7132073e-04 -6.3941808e-04  3.0277157e-03
  2.9096641e-03 -2.4778468e-03 -2.9532036e-03  7.7463314e-04
  2.7473709e-03 -7.6333171e-04 -8.1811845e-03 -1.3959130e-03
  3.2840301e-03  6.0461317e-03 -1.3022404e-04 -9.4000692e-04
 -2.0096730e-04  3.3895797e-03  2.9710699e-03  1.9046264e-03
  2.5092331e-03 -2.0799250e-04 -2.2211851e-04 -3.4621451e-05
  1.9962704e-03 -2.3159904e-03  2.9832027e-03  3.3852295e-03
  3.4411502e-04 -1.9019389e-03 -3.6734296e-04 -1.4232489e-03
  2.6938838e-03 -2.8015859e-03 -5.7366290e-03  8.0239226e-04
 -6.2909431e-04  1.1508183e-03 -1.5899434e-04 -5.9326587e-04
 -4.1618512e-04  5.2454891e-03  1.2823739e-03 -1.7550631e-03
 -3.0120560e-03 -3.8433261e-03 -9.6873334e-04  1.9963509e-03
  1.8154597e-03  4.7434499e-03  1.7146189e-03  1.1544267e-03], shape=(176,), dtype=float32)</code></pre></div></blockquote><p>For each input character the <code class="language-text">example_batch_predictions</code> array contains a vector of probabilities of what the next character might be. If probability at position <code class="language-text">15</code> in that vector is, lets say, <code class="language-text">0.3</code> and the probability at position <code class="language-text">25</code> is <code class="language-text">1.1</code> it means that we should better pick the character with the index <code class="language-text">25</code> as next following character.</p><p>Since we want our network to generate different recipes (even for the same input), we can&#x27;t just pick the maximum probability value. In this case we will end up with the same recipe being predicted by the network over and over again. What we will do instead is drawing <strong>samples</strong> from predictions (like the one printed above) by using <a href="https://www.tensorflow.org/api_docs/python/tf/random/categorical">tf.random.categorical()</a> function. It will bring some fuzziness to the network. For example, let&#x27;s say we have character <code class="language-text">H</code> as an input, then, by sampling from categorical distribution, our network may predict not only the word <code class="language-text">He</code>, but also words <code class="language-text">Hello</code>, and <code class="language-text">Hi</code> etc.</p><h3 id="understanding-how-tfrandomcategorical-works" style="position:relative">Understanding how <code class="language-text">tf.random.categorical</code> works<a href="#understanding-how-tfrandomcategorical-works" aria-label="understanding how tfrandomcategorical works permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token comment"># logits is 2-D Tensor with shape [batch_size, num_classes].</span>
<span class="token comment"># Each slice [i, :] represents the unnormalized log-probabilities for all classes.</span>
<span class="token comment"># In the example below we say that the probability for class &quot;0&quot;</span>
<span class="token comment"># (element with index 0) is low but the probability for class &quot;2&quot; is much higher.</span>
tmp_logits <span class="token operator">=</span> <span class="token punctuation">[</span>
  <span class="token punctuation">[</span><span class="token operator">-</span><span class="token number">0.95</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0.95</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
<span class="token punctuation">]</span><span class="token punctuation">;</span>

<span class="token comment"># Let&#x27;s generate 5 samples. Each sample is a class index. Class probabilities</span>
<span class="token comment"># are being taken into account (we expect to see more samples of class &quot;2&quot;).</span>
tmp_samples <span class="token operator">=</span> tf<span class="token punctuation">.</span>random<span class="token punctuation">.</span>categorical<span class="token punctuation">(</span>
    logits<span class="token operator">=</span>tmp_logits<span class="token punctuation">,</span>
    num_samples<span class="token operator">=</span><span class="token number">5</span>
<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span>tmp_samples<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">tf.Tensor([[2 1 2 2 1]], shape=(1, 5), dtype=int64)</code></pre></div></blockquote><h3 id="sampling-from-lstm-predictions" style="position:relative">Sampling from LSTM predictions<a href="#sampling-from-lstm-predictions" aria-label="sampling from lstm predictions permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">sampled_indices <span class="token operator">=</span> tf<span class="token punctuation">.</span>random<span class="token punctuation">.</span>categorical<span class="token punctuation">(</span>
    logits<span class="token operator">=</span>example_batch_predictions<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
    num_samples<span class="token operator">=</span><span class="token number">1</span>
<span class="token punctuation">)</span>

sampled_indices <span class="token operator">=</span> tf<span class="token punctuation">.</span>squeeze<span class="token punctuation">(</span>
    <span class="token builtin">input</span><span class="token operator">=</span>sampled_indices<span class="token punctuation">,</span>
    axis<span class="token operator">=</span><span class="token operator">-</span><span class="token number">1</span>
<span class="token punctuation">)</span><span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span>

sampled_indices<span class="token punctuation">.</span>shape</code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">(2000,)</code></pre></div></blockquote><p>Let&#x27;s see some sampled predictions for the first <code class="language-text">100</code> chars of the recipe:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">sampled_indices<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">100</span><span class="token punctuation">]</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">array([ 64,  21,  91, 126, 170,  42, 146,  54, 125, 164,  60, 171,   9,
        87, 129,  28, 146, 103,  41, 101, 147,   3, 134, 171,   8, 170,
       105,   5,  44, 173,   5, 105,  17, 138, 165,  32,  88,  96, 145,
        83,  33,  65, 172, 162,   8,  29, 147,  58,  81, 153, 150,  56,
       156,  38, 144, 134,  13,  40,  17,  50,  27,  35,  39, 112,  63,
       139, 151, 133,  68,  29,  91,   2,  70, 112, 135,  31,  26, 156,
       118,  71,  49, 104,  75,  27, 164,  41, 117, 124,  18, 137,  59,
       160, 158, 119, 173,  50,  78,  45, 121, 118])</code></pre></div></blockquote><p>We may see now what our untrained model actually predicts:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Input:\n&#x27;</span><span class="token punctuation">,</span> <span class="token builtin">repr</span><span class="token punctuation">(</span><span class="token string">&#x27;&#x27;</span><span class="token punctuation">.</span>join<span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span><span class="token punctuation">[</span>input_example_batch<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">50</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;Next char prediction:\n&#x27;</span><span class="token punctuation">,</span> <span class="token builtin">repr</span><span class="token punctuation">(</span><span class="token string">&#x27;&#x27;</span><span class="token punctuation">.</span>join<span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span><span class="token punctuation">[</span>sampled_indices<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token number">50</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Input:
 &#x27;📗   R e s t a u r a n t - S t y l e   C o l e s l a w   I \n \n 🥕 \n \n •   1   ( 1 6   o u n c e )   p&#x27;

Next char prediction:
 &#x27;H . î ⁄ ă ( “ I º Â 8 ̀ s % ù y “ © 0 ’ ‧ a ì ̀ r ă + o A € o + m × ␣ ︎ ñ ç ‱ ! S : ⅞ ´ r 2 ‧ D Q Á&#x27;</code></pre></div></blockquote><p>As you may see, the model suggests some meaningless predictions, but this is because it wasn&#x27;t trained yet.</p><h2 id="training-the-model" style="position:relative">Training the model<a href="#training-the-model" aria-label="training the model permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>We want to train our model to generate recipes as similar to the real ones as possible. We will use all data from dataset for training. There is not need to extract test or validation sub-sets in this case.</p><h3 id="attach-an-optimizer-and-a-loss-function" style="position:relative">Attach an optimizer, and a loss function<a href="#attach-an-optimizer-and-a-loss-function" aria-label="attach an optimizer and a loss function permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>We&#x27;re going to use <a href="https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam">tf.keras.optimizers.Adam</a> optimizer with <a href="https://www.tensorflow.org/api_docs/python/tf/keras/losses/sparse_categorical_crossentropy">tf.keras.losses.sparse_categorical_crossentropy()</a> loss function to train the model:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token comment"># An objective function.</span>
<span class="token comment"># The function is any callable with the signature scalar_loss = fn(y_true, y_pred).</span>
<span class="token keyword">def</span> <span class="token function">loss</span><span class="token punctuation">(</span>labels<span class="token punctuation">,</span> logits<span class="token punctuation">)</span><span class="token punctuation">:</span>
    entropy <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>losses<span class="token punctuation">.</span>sparse_categorical_crossentropy<span class="token punctuation">(</span>
      y_true<span class="token operator">=</span>labels<span class="token punctuation">,</span>
      y_pred<span class="token operator">=</span>logits<span class="token punctuation">,</span>
      from_logits<span class="token operator">=</span><span class="token boolean">True</span>
    <span class="token punctuation">)</span>

    <span class="token keyword">return</span> entropy

example_batch_loss <span class="token operator">=</span> loss<span class="token punctuation">(</span>target_example_batch<span class="token punctuation">,</span> example_batch_predictions<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&quot;Prediction shape: &quot;</span><span class="token punctuation">,</span> example_batch_predictions<span class="token punctuation">.</span>shape<span class="token punctuation">,</span> <span class="token string">&quot; # (batch_size, sequence_length, vocab_size)&quot;</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&quot;scalar_loss.shape:      &quot;</span><span class="token punctuation">,</span> example_batch_loss<span class="token punctuation">.</span>shape<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&quot;scalar_loss:      &quot;</span><span class="token punctuation">,</span> example_batch_loss<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">.</span>mean<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Prediction shape:    (64, 2000, 176)  # (batch_size, sequence_length, vocab_size)
scalar_loss.shape:   (64, 2000)
scalar_loss:         5.1618285</code></pre></div></blockquote><p>Let&#x27;s finally compile the model:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">adam_optimizer <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>optimizers<span class="token punctuation">.</span>Adam<span class="token punctuation">(</span>learning_rate<span class="token operator">=</span><span class="token number">0.001</span><span class="token punctuation">)</span>

model<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span>
    optimizer<span class="token operator">=</span>adam_optimizer<span class="token punctuation">,</span>
    loss<span class="token operator">=</span>loss
<span class="token punctuation">)</span></code></pre></div><h3 id="configuring-callbacks" style="position:relative">Configuring callbacks<a href="#configuring-callbacks" aria-label="configuring callbacks permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><h4 id="early-stopping-callback" style="position:relative">Early stopping callback<a href="#early-stopping-callback" aria-label="early stopping callback permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h4><p>For model training process we may configure a <a href="https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/EarlyStopping">tf.keras.callbacks.EarlyStopping</a> callback. It will stop the training automatically in case if model is not improving for several epochs anymore:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">early_stopping_callback <span class="token operator">=</span> tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>callbacks<span class="token punctuation">.</span>EarlyStopping<span class="token punctuation">(</span>
    patience<span class="token operator">=</span><span class="token number">5</span><span class="token punctuation">,</span>
    monitor<span class="token operator">=</span><span class="token string">&#x27;loss&#x27;</span><span class="token punctuation">,</span>
    restore_best_weights<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
    verbose<span class="token operator">=</span><span class="token number">1</span>
<span class="token punctuation">)</span></code></pre></div><h4 id="model-checkpoints-callback" style="position:relative">Model checkpoints callback<a href="#model-checkpoints-callback" aria-label="model checkpoints callback permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h4><p>Let&#x27;s also configure a <a href="https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/ModelCheckpoint">tf.keras.callbacks.ModelCheckpoint</a> checkpoint that will allow us to periodically save trained weights to the file so that we could restore the model from weights afterwards.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token comment"># Create a checkpoints directory.</span>
checkpoint_dir <span class="token operator">=</span> <span class="token string">&#x27;tmp/checkpoints&#x27;</span>
os<span class="token punctuation">.</span>makedirs<span class="token punctuation">(</span>checkpoint_dir<span class="token punctuation">,</span> exist_ok<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">)</span>

checkpoint_prefix <span class="token operator">=</span> os<span class="token punctuation">.</span>path<span class="token punctuation">.</span>join<span class="token punctuation">(</span>checkpoint_dir<span class="token punctuation">,</span> <span class="token string">&#x27;ckpt_{epoch}&#x27;</span><span class="token punctuation">)</span>
checkpoint_callback<span class="token operator">=</span>tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>callbacks<span class="token punctuation">.</span>ModelCheckpoint<span class="token punctuation">(</span>
    filepath<span class="token operator">=</span>checkpoint_prefix<span class="token punctuation">,</span>
    save_weights_only<span class="token operator">=</span><span class="token boolean">True</span>
<span class="token punctuation">)</span></code></pre></div><h3 id="execute-the-training" style="position:relative">Execute the training<a href="#execute-the-training" aria-label="execute the training permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>Let&#x27;s train our model for <code class="language-text">500</code> epochs with <code class="language-text">1500</code> steps per each epoch. For each epoch step the batch of <code class="language-text">64</code> recipes will be fetched and gradient descent will be executed for those <code class="language-text">64</code> recipes of length <code class="language-text">2000</code> step by step.</p><p>If you&#x27;re experimenting with training parameters it might make sense to reduce the number of epochs to, let&#x27;s say <code class="language-text">20</code> along with the number of steps per epoch and then see how the model performs under that conditions. If the model improves its performance you may add more data (steps and epochs) to the training process. It might save you some time while you adjust parameters.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">EPOCHS <span class="token operator">=</span> <span class="token number">500</span>
INITIAL_EPOCH <span class="token operator">=</span> <span class="token number">1</span>
STEPS_PER_EPOCH <span class="token operator">=</span> <span class="token number">1500</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;EPOCHS:          &#x27;</span><span class="token punctuation">,</span> EPOCHS<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;INITIAL_EPOCH:   &#x27;</span><span class="token punctuation">,</span> INITIAL_EPOCH<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;STEPS_PER_EPOCH: &#x27;</span><span class="token punctuation">,</span> STEPS_PER_EPOCH<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">EPOCHS:           500
INITIAL_EPOCH:    1
STEPS_PER_EPOCH:  1500</code></pre></div></blockquote><p>Let&#x27;s launch the training:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">history <span class="token operator">=</span> model<span class="token punctuation">.</span>fit<span class="token punctuation">(</span>
    x<span class="token operator">=</span>dataset_train<span class="token punctuation">,</span>
    epochs<span class="token operator">=</span>EPOCHS<span class="token punctuation">,</span>
    steps_per_epoch<span class="token operator">=</span>STEPS_PER_EPOCH<span class="token punctuation">,</span>
    initial_epoch<span class="token operator">=</span>INITIAL_EPOCH<span class="token punctuation">,</span>
    callbacks<span class="token operator">=</span><span class="token punctuation">[</span>
        checkpoint_callback<span class="token punctuation">,</span>
        early_stopping_callback
    <span class="token punctuation">]</span>
<span class="token punctuation">)</span>

<span class="token comment"># Saving the trained model to file (to be able to re-use it later).</span>
model_name <span class="token operator">=</span> <span class="token string">&#x27;recipe_generation_rnn_raw.h5&#x27;</span>
model<span class="token punctuation">.</span>save<span class="token punctuation">(</span>model_name<span class="token punctuation">,</span> save_format<span class="token operator">=</span><span class="token string">&#x27;h5&#x27;</span><span class="token punctuation">)</span></code></pre></div><h3 id="visualizing-training-progress" style="position:relative">Visualizing training progress<a href="#visualizing-training-progress" aria-label="visualizing training progress permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">render_training_history</span><span class="token punctuation">(</span>training_history<span class="token punctuation">)</span><span class="token punctuation">:</span>
    loss <span class="token operator">=</span> training_history<span class="token punctuation">.</span>history<span class="token punctuation">[</span><span class="token string">&#x27;loss&#x27;</span><span class="token punctuation">]</span>

    plt<span class="token punctuation">.</span>title<span class="token punctuation">(</span><span class="token string">&#x27;Loss&#x27;</span><span class="token punctuation">)</span>
    plt<span class="token punctuation">.</span>xlabel<span class="token punctuation">(</span><span class="token string">&#x27;Epoch&#x27;</span><span class="token punctuation">)</span>
    plt<span class="token punctuation">.</span>ylabel<span class="token punctuation">(</span><span class="token string">&#x27;Loss&#x27;</span><span class="token punctuation">)</span>
    plt<span class="token punctuation">.</span>plot<span class="token punctuation">(</span>loss<span class="token punctuation">,</span> label<span class="token operator">=</span><span class="token string">&#x27;Training set&#x27;</span><span class="token punctuation">)</span>
    plt<span class="token punctuation">.</span>legend<span class="token punctuation">(</span><span class="token punctuation">)</span>
    plt<span class="token punctuation">.</span>grid<span class="token punctuation">(</span>linestyle<span class="token operator">=</span><span class="token string">&#x27;--&#x27;</span><span class="token punctuation">,</span> linewidth<span class="token operator">=</span><span class="token number">1</span><span class="token punctuation">,</span> alpha<span class="token operator">=</span><span class="token number">0.5</span><span class="token punctuation">)</span>
    plt<span class="token punctuation">.</span>show<span class="token punctuation">(</span><span class="token punctuation">)</span>

render_training_history<span class="token punctuation">(</span>history<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><p><span class="gatsby-resp-image-wrapper" style="position:relative;display:block;margin-left:auto;margin-right:auto;max-width:392px">
      <span class="gatsby-resp-image-background-image" style="padding-bottom:70.8%;position:relative;bottom:0;left:0;background-image:url(&#x27;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAOCAYAAAAvxDzwAAAACXBIWXMAAAsSAAALEgHS3X78AAACvUlEQVQ4y5VSS08TURQ+AVwJcUXDojFxrwkBtIlNiPGHQIzxd7h0VwjQ4A+gxoW4UdtSG/ABNiUsLGpbJmmxaacPO4U+pvfembkPc6ZTKCQuvMnJ/e6ZO9/5zncPKKXg2dMnEPvwfvLbl72ZxMekL5m8jEQi4UulUtPLy8t3l5aW7qXT6WnMfd7b9aW+fvJlMhlfPp/3ZbPZmVgsNgXx+M44khZ/l563OqbJGC0TQqoYlNLqEBNCaqZp1jHHKK222t1qrXleLRaLVU3TyoVCoa9p2gtYX9+4AQAT5XJ5/bxvKSGlkl7guo6Hy3a4shyuCCGq2+2qXq+nGo3GSwiHw0g4Vq9WVtp9S/WZbUshBOdcCG8fxcOFWEoplEIN0sJihmFswNbW1gS23KjXQ1jxzLQcVMC5cJUIIS5U/gNLzrmDZ8MwwhAKhVDheKVSWcELzS51qO0oLI7tc84vfh5iDMQe8VVCv9/vBwDQdT2EF2yHO/U2UcSy0TXl/C9hMBicQg91XV8RrunS/djoEO8R5BXCYcsjGC2/JKxUKuPeK696FbFfZTJbGT2mOsQa+On5NqpQCMxd8zASibiPgh56lx3OuRSCyz5zZKfPZLNLUYWklo16UJKUgku3ykAiTsaAcG1tzX0UXddXvRFzRmcPF7EcZfQsT7HtfutSW531LTd61HZwQlqtVhg2NzeREEqlUogxphhjDu6UUsUsC3dp4ZkxaVtMNtumqhgd1eqYklCmCGXS6Jh2q2Oqeq0Whvn5+TEAuBmJRO5Eo9FHp6ens/v7+4FkMvmwXC7PxuPx4MHBQUDTTua2t98uZn8cL5yc5Od2ou8Wc9lfC8eZ7/ffvH71uFTQHqRSqdsQCARQIOzu7gJ6eXR0BIQQFx8eHrp7u93GK5MAcOvncQZyuZybLxYL0Gz+GWAtB+l0Gv4CQB+bNCNQpHgAAAAASUVORK5CYII=&#x27;);background-size:cover;display:block"></span>
  <img class="gatsby-resp-image-image" alt="Model training progress (first 10 epochs)" title="Model training progress (first 10 epochs)" src="/static/ea6a650f1d217420032f640487956e78/0acb4/6.png" srcSet="/static/ea6a650f1d217420032f640487956e78/63868/6.png 250w,/static/ea6a650f1d217420032f640487956e78/0acb4/6.png 392w" sizes="(max-width: 392px) 100vw, 392px" style="width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0" loading="lazy"/>
    </span></p><p>ℹ️ <em>On the chart above only first 10 epochs are presented.</em></p><p>We can see from the chart that model performance is getting better during the training. It means that the model learns to predict next characters in a way that the final sequence looks similar to some real recipe texts.</p><h2 id="generating-recipes" style="position:relative">Generating recipes<a href="#generating-recipes" aria-label="generating recipes permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><h3 id="restore-the-model-from-the-latest-checkpoint" style="position:relative">Restore the model from the latest checkpoint<a href="#restore-the-model-from-the-latest-checkpoint" aria-label="restore the model from the latest checkpoint permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>To keep this prediction step simple, we will restore the saved model and rebuild it with a batch size of 1. Because of the way the RNN state is passed from time-step to time-step, the model only accepts a fixed batch size once built. To run the model with a different <code class="language-text">batch_size</code>, we need to rebuild the model and restore the weights from the checkpoint.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">tf<span class="token punctuation">.</span>train<span class="token punctuation">.</span>latest_checkpoint<span class="token punctuation">(</span>checkpoint_dir<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">&#x27;tmp/checkpoints/ckpt_1&#x27;</code></pre></div></blockquote><p>Let&#x27;s rebuild the model with batch size of <code class="language-text">1</code> and load trained weights to it:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">simplified_batch_size <span class="token operator">=</span> <span class="token number">1</span>

model_simplified <span class="token operator">=</span> build_model<span class="token punctuation">(</span>vocab_size<span class="token punctuation">,</span> embedding_dim<span class="token punctuation">,</span> rnn_units<span class="token punctuation">,</span> simplified_batch_size<span class="token punctuation">)</span>
model_simplified<span class="token punctuation">.</span>load_weights<span class="token punctuation">(</span>tf<span class="token punctuation">.</span>train<span class="token punctuation">.</span>latest_checkpoint<span class="token punctuation">(</span>checkpoint_dir<span class="token punctuation">)</span><span class="token punctuation">)</span>
model_simplified<span class="token punctuation">.</span>build<span class="token punctuation">(</span>tf<span class="token punctuation">.</span>TensorShape<span class="token punctuation">(</span><span class="token punctuation">[</span>simplified_batch_size<span class="token punctuation">,</span> <span class="token boolean">None</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span>

model_simplified<span class="token punctuation">.</span>summary<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Model: &quot;sequential_6&quot;
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
embedding_6 (Embedding)      (1, None, 256)            45056
_________________________________________________________________
lstm_5 (LSTM)                (1, None, 1024)           5246976
_________________________________________________________________
dense_5 (Dense)              (1, None, 176)            180400
=================================================================
Total params: 5,472,432
Trainable params: 5,472,432
Non-trainable params: 0
_________________________________________________________________</code></pre></div></blockquote><p>Let&#x27;s double check that input shape is simplified:</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">model_simplified<span class="token punctuation">.</span>input_shape</code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">(1, None)</code></pre></div></blockquote><h3 id="the-prediction-loop" style="position:relative">The prediction loop<a href="#the-prediction-loop" aria-label="the prediction loop permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>To use our trained model for recipe generation we need to implement a so-called prediction loop. The following code block generates the text using the loop:</p><ul><li>It starts by choosing a start string, initializing the RNN state and setting the number of characters to generate.</li><li>It gets the prediction distribution of the next character using the start string, and the RNN state.</li><li>Then, it uses a categorical distribution to calculate the index of the predicted character. It uses this predicted character as the next input to the model.</li><li>The RNN state returned by the model is fed back into the model so that it now has more context, instead of only one character. After predicting the next character, the modified RNN states are again fed back into the model, which is how it learns as it gets more context from the previously predicted characters.</li></ul><p><span class="gatsby-resp-image-wrapper" style="position:relative;display:block;margin-left:auto;margin-right:auto;max-width:866px">
      <span class="gatsby-resp-image-background-image" style="padding-bottom:37.99999999999999%;position:relative;bottom:0;left:0;background-image:url(&#x27;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAICAYAAAD5nd/tAAAACXBIWXMAAAsTAAALEwEAmpwYAAACdElEQVQozyWQWU8TARSFT9lKQC1KICFo1KDhQeIDGDeCccEAEaQYUBpKAFlboGIpDVCDgdbpXmbazkwX1hYMFiy0QIuKKCDEYDThxWhifPLF/6COmXKe7s299zs5F9fOt0EuDh7VSn/Fddb6UHJhAI9r1pK7xOHUpgoG+34OvOTiYFrlTSWKRDO4c/kJOqsWRcrq94kP77K4f8uEdNEpWDXbAMdxUDZPHVc0ejIHO4IJ3D8OUvGISNXqO9kmIZNxoPi+1pnTHXWkaEgWiedvFA3erJ6m8aw2CZVUVaxGXYUWVs0HwDO6FrKNzG+5rdFt09B8vscWVdj1i5+chtBHlyXaSxGhPNoU3iV1LzfdtmjIPBTOZi0r7lHdwg5rXtllzVGJ37sMyuiLu13UAHiolStWlmlxOGZLtN1Lh2n9Wq6dnawdZbxShoicMA6sZjD2QKnVxbQw5MKlYcXqIca8XGBjXY1253Ql9TSaaejfSi672nuQxRDsq554S3Ku13odH+URJc9xvzJ9m9wgfxSqMo8BSKRCw4apDYqzhTX3+J0Rv7p4fN32Z2zdEgAgkGmV2WnXIeRrsEvWHPLFM4KcI254QgwMxPN0V5BWjkUcstL6ggQfB9j9dPl4hNaSc8RZOmyExenO9YbpYfcyKd75uQeXeS9lQD6bUZBXkoDW/vrY03lnXopeAwBOwHH5aK+zw/FGjb+xERfHBJygAmbBoEcW639zRhSm0BhsDgu8+q/CGIAHlT0oFkq7a+LPNQJd5VH09biF6k5fantZCGfAwfhOIlB1+I4sfCdQiM9oubgJlXwiVaOaTuqpD8Cr/QKXbj/G+g/HjBD8CXA9jgAAAABJRU5ErkJggg==&#x27;);background-size:cover;display:block"></span>
  <img class="gatsby-resp-image-image" alt="Prediction loop" title="Prediction loop" src="/static/47e18cbb8b1bcf3590ff8231b4978b36/c1328/7.png" srcSet="/static/47e18cbb8b1bcf3590ff8231b4978b36/63868/7.png 250w,/static/47e18cbb8b1bcf3590ff8231b4978b36/0b533/7.png 500w,/static/47e18cbb8b1bcf3590ff8231b4978b36/c1328/7.png 866w" sizes="(max-width: 866px) 100vw, 866px" style="width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0" loading="lazy"/>
    </span></p><blockquote><p>Image source: <a href="https://www.tensorflow.org/tutorials/text/text_generation">Text generation with an RNN</a> notebook.</p></blockquote><p>The <code class="language-text">temperature</code> parameter here defines how fuzzy or how unexpected the generated recipe is going to be. Low temperatures results in more predictable text. Higher temperatures result in more surprising text. You need to experiment to find the best setting. We will do some experimentation with different temperatures below.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">generate_text</span><span class="token punctuation">(</span>model<span class="token punctuation">,</span> start_string<span class="token punctuation">,</span> num_generate <span class="token operator">=</span> <span class="token number">1000</span><span class="token punctuation">,</span> temperature<span class="token operator">=</span><span class="token number">1.0</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token comment"># Evaluation step (generating text using the learned model)</span>

    padded_start_string <span class="token operator">=</span> STOP_WORD_TITLE <span class="token operator">+</span> start_string

    <span class="token comment"># Converting our start string to numbers (vectorizing).</span>
    input_indices <span class="token operator">=</span> np<span class="token punctuation">.</span>array<span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>texts_to_sequences<span class="token punctuation">(</span><span class="token punctuation">[</span>padded_start_string<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span>

    <span class="token comment"># Empty string to store our results.</span>
    text_generated <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>

    <span class="token comment"># Here batch size == 1.</span>
    model<span class="token punctuation">.</span>reset_states<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">for</span> char_index <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span>num_generate<span class="token punctuation">)</span><span class="token punctuation">:</span>
        predictions <span class="token operator">=</span> model<span class="token punctuation">(</span>input_indices<span class="token punctuation">)</span>
        <span class="token comment"># remove the batch dimension</span>
        predictions <span class="token operator">=</span> tf<span class="token punctuation">.</span>squeeze<span class="token punctuation">(</span>predictions<span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">)</span>

        <span class="token comment"># Using a categorical distribution to predict the character returned by the model.</span>
        predictions <span class="token operator">=</span> predictions <span class="token operator">/</span> temperature
        predicted_id <span class="token operator">=</span> tf<span class="token punctuation">.</span>random<span class="token punctuation">.</span>categorical<span class="token punctuation">(</span>
            predictions<span class="token punctuation">,</span>
            num_samples<span class="token operator">=</span><span class="token number">1</span>
        <span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span>

        <span class="token comment"># We pass the predicted character as the next input to the model</span>
        <span class="token comment"># along with the previous hidden state.</span>
        input_indices <span class="token operator">=</span> tf<span class="token punctuation">.</span>expand_dims<span class="token punctuation">(</span><span class="token punctuation">[</span>predicted_id<span class="token punctuation">]</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">)</span>

        next_character <span class="token operator">=</span> tokenizer<span class="token punctuation">.</span>sequences_to_texts<span class="token punctuation">(</span>input_indices<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>

        text_generated<span class="token punctuation">.</span>append<span class="token punctuation">(</span>next_character<span class="token punctuation">)</span>

    <span class="token keyword">return</span> <span class="token punctuation">(</span>padded_start_string <span class="token operator">+</span> <span class="token string">&#x27;&#x27;</span><span class="token punctuation">.</span>join<span class="token punctuation">(</span>text_generated<span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre></div><h3 id="figuring-out-proper-temperature-for-prediction-loop" style="position:relative">Figuring out proper temperature for prediction loop<a href="#figuring-out-proper-temperature-for-prediction-loop" aria-label="figuring out proper temperature for prediction loop permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h3><p>Now, let&#x27;s use <code class="language-text">generate_text()</code> to actually generate some new recipes. The <code class="language-text">generate_combinations()</code> function goes through all possible combinations of the first recipe letters and temperatures. It generates <code class="language-text">56</code> different combinations to help us figure out how the model performs and what temperature is better to use.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python"><span class="token keyword">def</span> <span class="token function">generate_combinations</span><span class="token punctuation">(</span>model<span class="token punctuation">)</span><span class="token punctuation">:</span>
    recipe_length <span class="token operator">=</span> <span class="token number">1000</span>
    try_letters <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">&#x27;&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;\n&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;A&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;B&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;C&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;O&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;L&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;Mushroom&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;Apple&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;Slow&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;Christmass&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;The&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;Banana&#x27;</span><span class="token punctuation">,</span> <span class="token string">&#x27;Homemade&#x27;</span><span class="token punctuation">]</span>
    try_temperature <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token number">1.0</span><span class="token punctuation">,</span> <span class="token number">0.8</span><span class="token punctuation">,</span> <span class="token number">0.4</span><span class="token punctuation">,</span> <span class="token number">0.2</span><span class="token punctuation">]</span>

    <span class="token keyword">for</span> letter <span class="token keyword">in</span> try_letters<span class="token punctuation">:</span>
        <span class="token keyword">for</span> temperature <span class="token keyword">in</span> try_temperature<span class="token punctuation">:</span>
            generated_text <span class="token operator">=</span> generate_text<span class="token punctuation">(</span>
                model<span class="token punctuation">,</span>
                start_string<span class="token operator">=</span>letter<span class="token punctuation">,</span>
                num_generate <span class="token operator">=</span> recipe_length<span class="token punctuation">,</span>
                temperature<span class="token operator">=</span>temperature
            <span class="token punctuation">)</span>
            <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f&#x27;Attempt: &quot;</span><span class="token interpolation"><span class="token punctuation">{</span>letter<span class="token punctuation">}</span></span><span class="token string">&quot; + </span><span class="token interpolation"><span class="token punctuation">{</span>temperature<span class="token punctuation">}</span></span><span class="token string">&#x27;</span></span><span class="token punctuation">)</span>
            <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;-----------------------------------&#x27;</span><span class="token punctuation">)</span>
            <span class="token keyword">print</span><span class="token punctuation">(</span>generated_text<span class="token punctuation">)</span>
            <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">&#x27;\n\n&#x27;</span><span class="token punctuation">)</span>
</code></pre></div><p>To avoid making this article too long only some of those <code class="language-text">56</code> combinations will be printed below.</p><div class="gatsby-highlight" data-language="python"><pre class="language-python"><code class="language-python">generate_combinations<span class="token punctuation">(</span>model_simplified<span class="token punctuation">)</span></code></pre></div><p><em><small>➔ output:</small></em></p><blockquote><div class="gatsby-highlight" data-language="text"><pre class="language-text"><code class="language-text">Attempt: &quot;A&quot; + 1.0
-----------------------------------
📗 Azzeric Sweet Potato Puree

🥕

• 24 large baking potatoes, such as Carn or Marinara or 1 (14-ounce) can pot wine
• 1/4 pound unsalted butter, cut into small pieces
• 1/2 cup coarsely chopped scallions

📝

▪︎ Bring a large pot of water to a boil, place a large nonstick skillet over medium-high heat, add All Naucocal Volves. Reduce heat to medium and cook the potatoes until just cooked through, bubbles before adding the next layer, about 10 to 12 minutes. Remove ground beans and reserve. Reserve the crumb mixture for about 6 greased. Let cool 2 minutes. Strain soak into a glass pitcher. Let cool in ice. Add short-goodfish to the batter and stir to dissolve. Pour in the cheese mixture and whisk until smooth. Set aside for 20 seconds more. Remove dumplings and cheese curds. Spread 1/3 cup of the mixture on each circle for seal ballo. Transfer mixture into a greased 9-by-11-inch baking dish and chill for 20 minutes.
▪︎ Bake, covered, for 30 minutes. Serve warm.
␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣


Attempt: &quot;A&quot; + 0.4
-----------------------------------
📗 Apricot &quot;Cookie&quot; Cakes

🥕

• 1 cup all-purpose flour
• 1 cup corn flour
• 1 cup sugar
• 1 tablespoon baking powder
• 1 teaspoon salt
• 1 teaspoon ground cinnamon
• 1 cup grated Parmesan
• 1 cup pecans, chopped
• 1/2 cup chopped pecans
• 1/2 cup raisins

📝

▪︎ Preheat oven to 350 degrees F.
▪︎ Butter and flour a 9 by 13-inch baking dish. In a medium bowl, whisk together the flour, sugar, baking powder, baking soda and salt. In a small bowl, whisk together the eggs, sugar, and eggs. Add the flour mixture to the butter mixture and mix until just combined. Stir in the raisins and pecans and transfer to the prepared pan. Spread the batter over the top of the crust. Bake for 15 minutes. Reduce the oven temperature to 350 degrees F, and bake until the cupcakes are set and the top is golden brown, about 20 minutes more. Transfer the cake to a wire rack to cool to room temperature. Refrigerate until ready to serve.
␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣


Attempt: &quot;A&quot; + 0.2
-----------------------------------
📗 Alternative to the Fondant

🥕

• 1 cup sugar
• 1 cup water
• 1 cup heavy cream
• 1 teaspoon vanilla extract
• 1/2 cup heavy cream
• 1/2 cup heavy cream
• 1 teaspoon vanilla extract
• 1/2 cup chopped pecans

📝

▪︎ In a saucepan over medium heat, combine the sugar, sugar, and corn syrup. Cook over medium heat until the sugar is dissolved. Remove from the heat and stir in the vanilla. Refrigerate until cold. Stir in the chocolate chips and the chocolate chips. Serve immediately.
␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣


Attempt: &quot;B&quot; + 0.4
-----------------------------------
📗 Battered French Toast with Bacon, Bacon, and Caramelized Onions and Pecorino

🥕

• 1/2 pound squid (shredded carrots)
• 1 small onion, diced
• 1 small green pepper, seeded and cut into strips
• 1 red bell pepper, stemmed, seeded and cut into 1/4-inch dice
• 1 small onion, chopped
• 1 green bell pepper, chopped
• 1 cup chicken stock
• 1 cup heavy cream
• 1/2 cup shredded sharp Cheddar
• 1 teaspoon ground cumin
• 1 teaspoon salt
• 1 teaspoon freshly ground black pepper

📝

▪︎ Preheat the oven to 350 degrees F.
▪︎ For the bacon mixture: In a large bowl, combine the cheese, sour cream, mustard, salt, pepper, and hot sauce. Stir together and mix well. Fold in the milk and set aside.
▪︎ For the filling: In a large bowl, mix the flour and salt and pepper, to taste. Add the beaten eggs and mix to combine. Set aside.
▪︎ For the topping: Mix the cream cheese with the mayonnaise, salt and pepper in a medium bowl. Add the chicken and toss to coat the other side. Transfer the mixture to the prepared


Attempt: &quot;C&quot; + 1.0
-----------------------------------
📗 Crema battered Salmon

🥕

• 1 cup fresh cranberries (from 4 tablespoons left of 4 egg whites)
• 3 teaspoons sugar
• 1 tablespoon unsalted butter
• 2 tablespoons truffle oil
• Coarse salt
• Freshly ground black pepper

📝

▪︎ Place cornmeal in a small serving bowl, and combine it. Drizzle milk over the plums and season with salt and pepper. Let stand for about 5 minutes, until firm. Serve immediately.
␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣


Attempt: &quot;C&quot; + 0.8
-----------------------------------
📗 Classic Iseasteroles

🥕

• 3 cups milk
• 3/4 cup coconut milk
• 1/2 cup malted maple syrup
• 1/2 teaspoon salt
• 3 cups sugar
• 4 1-inch strawberries, sliced into 1/4-inch pieces
• 1/2 teaspoon ground cinnamon

📝

▪︎ Place the cherries in a small saucepan; sprinkle with the sugar. Bring to a simmer over medium-low heat, then remove from the heat. Let stand until the coconut fluffy, about 15 to 20 minutes. Drain the coconut oil in a stream, whisking until combined. Add the cream, espresso and cocoa powder and stir to combine. Cover and refrigerate until ready to serve. Makes 10 to 12 small springs in the same fat from the surface of the bowl, which using paper colors, and freeze overnight.
▪︎ Meanwhile, combine the cream, sugar, vanilla and salt in a medium saucepan. Cook over medium heat until the sugar dissolves and the sugar melts and begins to boil, about 5 minutes. Remove from the heat and stir in the vanilla.
▪︎ To serve, carefully remove the pops from the casserole and put them in


Attempt: &quot;C&quot; + 0.4
-----------------------------------
📗 Cinnamon Corn Cakes with Coconut Flour and Saffron Sauce

🥕

• 3 cups shredded sharp Cheddar
• 1 cup grated Parmesan
• 2 cups shredded sharp Cheddar
• 1 cup grated Parmesan
• 1 cup shredded part-skim mozzarella cheese
• 1 cup grated Parmesan
• 1 cup grated Parmesan
• 1 cup grated Parmesan
• 1 teaspoon kosher salt
• 1/2 teaspoon freshly ground black pepper

📝

▪︎ Preheat the oven to 400 degrees F. Line a baking sheet with a silpat and preheat the oven to 350 degrees F.
▪︎ In a large bowl, combine the masa harina, cumin, cayenne, and salt and pepper. Dredge the pasta in the flour and then dip in the egg mixture, then dip in the eggs, then dip in the egg mixture and then dredge in the breadcrumbs. Place the breaded cheese on a sheet tray. Bake until the crust is golden brown and the filling is bubbling, about 25 to 30 minutes. Remove from the oven and serve hot.
␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣


Attempt: &quot;L&quot; + 0.4
-----------------------------------
📗 Lighted Flan with Chocolate and Pecans

🥕

• 2 cups milk
• 1 cup sugar
• 1 teaspoon vanilla extract
• 1 cup heavy cream
• 1/2 cup heavy cream
• 1 tablespoon powdered sugar
• 1 teaspoon vanilla extract
• 1/2 cup heavy cream
• 1/2 teaspoon ground cinnamon
• 1/2 teaspoon ground nutmeg
• 1/2 cup chopped pecans

📝

▪︎ Watch how to make this recipe.
▪︎ In a small saucepan, combine the sugar, salt, and a pinch of salt. Cook over medium heat, stirring occasionally, until the sugar has dissolved. Remove from the heat and set aside to cool. Remove the cherries from the refrigerator and place in the freezer for 1 hour.
▪︎ In a blender, combine the milk, sugar, vanilla, salt and water. Blend until smooth. Pour the mixture into a 9-by-13-inch glass baking dish and set aside.
▪︎ In a small saucepan, combine the remaining 2 cups sugar, the vanilla, and 2 cups water. Bring the mixture to a boil, and then reduce the heat to low. Cook until the sugar is dissolved, about 5 minutes. Remove from the heat an


Attempt: &quot;L&quot; + 0.2
-----------------------------------
📗 Lighted Fondanta with Chocolate and Cream Cheese Frosting

🥕

• 1 cup heavy cream
• 1 tablespoon sugar
• 1 tablespoon vanilla extract
• 1 teaspoon vanilla extract
• 1 cup heavy cream
• 1 cup heavy cream
• 1/2 cup sugar
• 1 teaspoon vanilla extract
• 1 teaspoon vanilla extract
• 1/2 cup chopped pistachios

📝

▪︎ Preheat the oven to 350 degrees F.
▪︎ In a large bowl, combine the cream cheese, sugar, eggs, vanilla, and salt. Stir until smooth. Pour the mixture into the prepared baking dish. Sprinkle with the remaining 1/2 cup sugar and bake for 15 minutes. Reduce the heat to 350 degrees F and bake until the crust is golden brown, about 15 minutes more. Remove from the oven and let cool completely. Spread the chocolate chips on the parchment paper and bake until the chocolate is melted and the top is golden brown, about 10 minutes. Set aside to cool.
▪︎ In a medium bowl, whisk together the egg yolks, sugar, and vanilla until smooth. Stir in the cream and continue to beat until the chocolate


Attempt: &quot;Mushroom&quot; + 1.0
-----------------------------------
📗 Mushroom and Bacon Soup with Jumbo Sugar Coating

🥕

• 2 tablespoons vegetable oil
• 1 2/3 pounds red cabbage, shredded, about 4 cups of excess pasted dark ends of fat, and pocked or firm
• 2 red bell peppers, cored, seeded and diced
• 1 poblano pepper, chopped
• 3 medium carrots, finely chopped
• 1/2 medium pinch saffron
• 4 cups water
• 2 cups mushrooms or 1/2 cup frozen Sojo Bean red
• Salt and freshly ground black pepper
• 1 pound andouille sausage
• 1 gallon vegetable broth
• Chopped fresh parsley, cilantro leaves, for garnish

📝

▪︎ In a large Dutch oven for gas burner, heat oil over moderate heat. Add the leeks to the pot, scraping the bottom of the skillet. Add the beans and sausage and sprinkle the reserved potatoes with some orange juice cooked sausage (such as The Sauce.) Add roasted vegetables and pinto beans, mozzarella, basil and bamboo shoots. Simmer rice until soup is absorbed, 15 to 20 minutes.
▪︎ Bring another pan of water to a boil and cook shrimp for 5 minutes. While onions


Attempt: &quot;Mushroom&quot; + 0.8
-----------------------------------
📗 Mushrooms with Lentil Stewed Shallots and Tomatoes

🥕

• 1 tablespoon olive oil
• 3 cloves garlic, smashed
• Kosher salt
• 1 1/2 pounds lean ground turkey
• 1 cup coarsely peeled tart apples
• 2 tablespoons chopped garlic
• 1 teaspoon ground cumin
• 1/2 teaspoon cayenne pepper
• 1 teaspoon chopped fresh thyme
• 3/4 cup chopped fresh basil
• 1/2 small carrot, halved lengthwise and cut into 1/2-inch pieces
• 1 roasted red pepper, halved and sliced vertically diced and separated into rough chops
• 3 tablespoons unsalted butter
• 2 cups shredded mozzarella
• 1/4 cup grated parmesan cheese
• 1/4 cup prepared basil pesto

📝

▪︎ Stir the olive oil, garlic, thyme and 1 teaspoon salt in a saucepan; bring to a simmer over medium heat. Remove from the heat. Add the basil and toast the soup for 2 minutes.
▪︎ Meanwhile, heat 4 to 4 inches vegetable oil in the skillet over medium-high heat. Add the olive oil, garlic, 1/2 teaspoon salt and 1/2 teaspoon pepper and cook, stirring often, until cooked through, a


Attempt: &quot;Mushroom&quot; + 0.4
-----------------------------------
📗 Mushroom Ravioli with Chickpeas and Shiitake Mushrooms and Sun-Dried Tomatoes

🥕

• 1 pound zucchini
• 1 cup chicken broth
• 1 cup fresh basil leaves
• 1/2 cup chopped fresh basil leaves
• 1/2 cup grated Parmesan
• 1 teaspoon salt
• 1/2 teaspoon freshly ground black pepper
• 1 teaspoon chopped fresh thyme
• 1 teaspoon fresh lemon juice
• 2 cups chicken broth
• 1/2 cup grated Parmesan
• 1/2 cup grated Parmigiano-Reggiano

📝

▪︎ Preheat oven to 450 degrees F.
▪︎ Place the bread cubes in a large bowl. Add the basil, parsley, olive oil, parsley, thyme, basil, salt and pepper and toss to coat. Spread the mixture out on a baking sheet and bake until the sausages are cooked through, about 20 minutes. Serve immediately.
▪︎ In a small saucepan, bring the chicken stock to a boil. Reduce the heat to low and cook the soup until the liquid is absorbed. Remove from the heat and stir in the parsley, shallots and season with salt and pepper. Serve immediately.
␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣␣


Attempt: &quot;Mushroom&quot; + 0.2
-----------------------------------
📗 Mushroom and Spicy Sausage Stuffing

🥕

• 1 tablespoon olive oil
• 1 medium onion, chopped
• 2 cloves garlic, minced
• 1 cup frozen peas
• 1 cup frozen peas
• 1/2 cup chopped fresh parsley
• 1/2 cup grated Parmesan
• 1/2 cup grated Parmesan
• 1 teaspoon salt
• 1/2 teaspoon freshly ground black pepper
• 1 cup shredded mozzarella
• 1/2 cup grated Parmesan
• 1 cup shredded mozzarella
• 1 cup shredded mozzarella cheese

📝

▪︎ Preheat the oven to 350 degrees F.
▪︎ Bring a large pot of salted water to a boil. Add the pasta and cook until al dente, about 6 minutes. Drain and reserve.
▪︎ Meanwhile, heat the olive oil in a large skillet over medium-high heat. Add the shallots and saute until tender, about 3 minutes. Add the garlic and cook for 1 minute. Add the sausage and cook until the shallots are tender, about 3 minutes. Add the sausage and cook until tender, about 2 minutes. Add the garlic and cook, stirring, until the garlic is lightly browned, about 1 minute. Add the sausage and cook until the s</code></pre></div></blockquote><h2 id="interactive-model-demo" style="position:relative">Interactive model demo<a href="#interactive-model-demo" aria-label="interactive model demo permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>You may use 🎨 <a href="https://trekhleb.dev/machine-learning-experiments/#/experiments/RecipeGenerationRNN">Cooking recipes generator demo</a> to play around with this model, input text, and temperature parameters to generate some random recipes right in your browser.</p><p><img src="/posts-assets/4139990ec63f03c35fab15d60ea74eff/8.gif" alt="Recipe generator demo"/></p><h2 id="things-to-improve" style="position:relative">Things to improve<a href="#things-to-improve" aria-label="things to improve permalink" class="gatsby-remark-autolink-header-anchor after"><svg aria-hidden="true" focusable="false" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a></h2><p>This out of scope for this article but model still has the following issues that need to be addressed:</p><ul><li>We need to get rid of duplicates on the ingredients section.</li><li>Recipe sections (name, ingredients and cooking steps) are disconnected most of the time, meaning that we may see, let&#x27;s say, <code class="language-text">mushrooms</code> in the ingredients section, but they are not mentioned in the name of the recipe or in the cooking steps.</li></ul></article></div><div class="flex flex-row justify-center items-center mt-16"><div class="max-w-md"><div class="bg-white rounded-md shadow-md p-8"><h1 class="text-grey-darkest uppercase font-bold text-xl mb-3">Subscribe to the Newsletter</h1><p class="text-sm mb-3">Get my latest posts and project updates by email</p><form action="https://dev.us1.list-manage.com/subscribe/post?u=7714f14ff32085c685da2cfaa&amp;amp;id=53ffa81463" method="post" class="flex flex-col"><input type="text" placeholder="First Name" name="FNAME" class="border py-2 px-3 mb-3 rounded border-gray-300 border-solid appearance-none" required=""/><input type="email" placeholder="Email" name="EMAIL" class="border py-2 px-3 mb-3 rounded border-gray-300 border-solid appearance-none" required=""/><div class="hidden" aria-hidden="true"><input type="text" name="b_7714f14ff32085c685da2cfaa_53ffa81463" tabindex="-1"/></div><input type="submit" value="Subscribe" class="transition duration-200 ease-in-out bg-black text-white py-2 px-3 rounded shadow-sm cursor-pointer hover:bg-gray-800"/></form></div></div></div></article><footer class="px-6 sm:px-12 py-12"><div class="flex flex-col sm:flex-row items-center "><div style="flex:1" class="flex flex-row items-center mb-6 sm:mb-0"><a class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 text-xs mr-5" href="/subscribe"><svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024" height="20" width="20" xmlns="http://www.w3.org/2000/svg"><path d="M928 160H96c-17.7 0-32 14.3-32 32v640c0 17.7 14.3 32 32 32h832c17.7 0 32-14.3 32-32V192c0-17.7-14.3-32-32-32zm-40 110.8V792H136V270.8l-27.6-21.5 39.3-50.5 42.8 33.3h643.1l42.8-33.3 39.3 50.5-27.7 21.5zM833.6 232L512 482 190.4 232l-42.8-33.3-39.3 50.5 27.6 21.5 341.6 265.6a55.99 55.99 0 0 0 68.7 0L888 270.8l27.6-21.5-39.3-50.5-42.7 33.2z"></path></svg><span class="w-2"></span>Subscribe</a><a href="https://github.com/trekhleb/trekhleb.github.io/discussions" class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 text-xs mr-5"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" height="20" width="20" xmlns="http://www.w3.org/2000/svg"><path d="M9 19c-5 1.5-5-2.5-7-3m14 6v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"></path></svg><span class="w-2"></span>Feedback</a><a class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 text-xs" href="/rss.xml"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" height="20" width="20" xmlns="http://www.w3.org/2000/svg"><path d="M4 11a9 9 0 0 1 9 9"></path><path d="M4 4a16 16 0 0 1 16 16"></path><circle cx="5" cy="19" r="1"></circle></svg><span class="w-2"></span>RSS</a></div><div style="flex:1" class="flex flex-row items-center justify-center"><ul class="flex flex-row flex-wrap "><li class="flex flex-row items-center last:mr-0 mr-2 ml-2"><a href="https://www.linkedin.com/in/trekhleb/" class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 " title="Oleksii Trekhleb on LinkedIn"><svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 448 512" class="w-5 h-5" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M416 32H31.9C14.3 32 0 46.5 0 64.3v383.4C0 465.5 14.3 480 31.9 480H416c17.6 0 32-14.5 32-32.3V64.3c0-17.8-14.4-32.3-32-32.3zM135.4 416H69V202.2h66.5V416zm-33.2-243c-21.3 0-38.5-17.3-38.5-38.5S80.9 96 102.2 96c21.2 0 38.5 17.3 38.5 38.5 0 21.3-17.2 38.5-38.5 38.5zm282.1 243h-66.4V312c0-24.8-.5-56.7-34.5-56.7-34.6 0-39.9 27-39.9 54.9V416h-66.4V202.2h63.7v29.2h.9c8.9-16.8 30.6-34.5 62.9-34.5 67.2 0 79.7 44.3 79.7 101.9V416z"></path></svg></a></li><li class="flex flex-row items-center last:mr-0 mr-2 ml-2"><a href="https://github.com/trekhleb" class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 " title="Oleksii Trekhleb on GitHub"><svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 496 512" class="w-5 h-5" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg></a></li><li class="flex flex-row items-center last:mr-0 mr-2 ml-2"><a href="https://twitter.com/Trekhleb" class="transition duration-200 ease-in-out flex flex-row items-center hover:text-red-600 " title="Oleksii Trekhleb on Twitter"><svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 512 512" class="w-5 h-5" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></li></ul></div><div style="flex:1" class="hidden sm:flex"> </div></div></footer></div></main></div><div id="gatsby-announcer" style="position:absolute;top:0;width:1px;height:1px;padding:0;overflow:hidden;clip:rect(0, 0, 0, 0);white-space:nowrap;border:0" aria-live="assertive" aria-atomic="true"></div></div><script async="" src="https://www.googletagmanager.com/gtag/js?id=G-YJ73BX984Z"></script><script>
      
      
      if(true) {
        window.dataLayer = window.dataLayer || [];
        function gtag(){window.dataLayer && window.dataLayer.push(arguments);}
        gtag('js', new Date());

        gtag('config', 'G-YJ73BX984Z', {"send_page_view":false});
      }
      </script><script id="gatsby-script-loader">/*<![CDATA[*/window.pagePath="/blog/2020/recipes-generation/";/*]]>*/</script><script id="gatsby-chunk-mapping">/*<![CDATA[*/window.___chunkMapping={"polyfill":["/polyfill-b7afeec9af34d175ba4b.js"],"app":["/app-2e0826ec06cafce3bdee.js"],"component---src-pages-404-tsx":["/component---src-pages-404-tsx-63f1eb2c9d9eccd00add.js"],"component---src-pages-blog-tsx":["/component---src-pages-blog-tsx-09e5ed745cd76684f8ac.js"],"component---src-pages-index-tsx":["/component---src-pages-index-tsx-700f213869b8e0037911.js"],"component---src-pages-projects-tsx":["/component---src-pages-projects-tsx-781dc12fd2babbe9fac0.js"],"component---src-pages-subscribe-confirm-index-tsx":["/component---src-pages-subscribe-confirm-index-tsx-7d43f1b2228f03a924f8.js"],"component---src-pages-subscribe-index-tsx":["/component---src-pages-subscribe-index-tsx-c02ecb9201e3fc4b2ce6.js"],"component---src-pages-subscribe-thanks-index-tsx":["/component---src-pages-subscribe-thanks-index-tsx-cec855950644a6361532.js"],"component---src-templates-post-tsx":["/component---src-templates-post-tsx-c4045391b1a7c095d609.js"],"component---src-templates-project-tsx":["/component---src-templates-project-tsx-d7e84ca35145045f8249.js"]};/*]]>*/</script><script src="/polyfill-b7afeec9af34d175ba4b.js" nomodule=""></script><script src="/component---src-templates-post-tsx-c4045391b1a7c095d609.js" async=""></script><script src="/commons-213c962999d4e181c8a0.js" async=""></script><script src="/app-2e0826ec06cafce3bdee.js" async=""></script><script src="/framework-d63adeb7e1b44b7b8aa5.js" async=""></script><script src="/webpack-runtime-98886e4c056db07027a1.js" async=""></script></body></html>