From 5f36274c2f9e3410bfab582918dd02fcf4d8e717 Mon Sep 17 00:00:00 2001 From: Leah Garrett Date: Mon, 5 Aug 2024 22:09:38 +1000 Subject: [PATCH 1/3] faster version using strings.Fields --- wordcount/main.go | 62 ++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/wordcount/main.go b/wordcount/main.go index a047b27..1b4ad1b 100644 --- a/wordcount/main.go +++ b/wordcount/main.go @@ -2,41 +2,47 @@ package main import ( "fmt" - "io" - "log" "os" + "strings" "time" - "unicode" ) -func readbyte(r io.Reader) (rune, error) { - var buf [1]byte - _, err := r.Read(buf[:]) - return rune(buf[0]), err -} - func main() { - f, err := os.Open(os.Args[1]) + // This version was implemented by Copilot + + // sample main version runs: + // "moby.txt": 181239 words, duration: 585907ms + // "moby.txt": 181239 words, duration: 581428ms + // "moby.txt": 181239 words, duration: 585015ms + + // sample run on this branch + // "moby.txt": 215838 words, duration: 17211ms + // "moby.txt": 215838 words, duration: 12804ms + // "moby.txt": 215838 words, duration: 11604ms + + // Read the file + filePath := os.Args[1] + content, err := os.ReadFile(filePath) if err != nil { - log.Fatalf("could not open file %q: %v", os.Args[1], err) + fmt.Println("Error reading file:", err) + return } start := time.Now() - words := 0 - inword := false - for { - r, err := readbyte(f) - if err == io.EOF { - break - } - if err != nil { - log.Fatalf("could not read file %q: %v", os.Args[1], err) - } - if unicode.IsSpace(r) && inword { - words++ - inword = false - } - inword = unicode.IsLetter(r) - } - fmt.Printf("%q: %d words, duration: %dms\n", os.Args[1], words, time.Since(start)/1000) + + // Convert the content to string + text := string(content) + + // Count the words + wordCount := countWords(text) + + fmt.Printf("%q: %d words, duration: %dms\n", os.Args[1], wordCount, time.Since(start)/1000) +} + +func countWords(text string) int { + // Split the text into words + words := strings.Fields(text) + + // Return the count of words + return len(words) } From ef88ad2fab358071ba2e85847eb5c50e9307a0ff Mon Sep 17 00:00:00 2001 From: Leah Garrett Date: Tue, 6 Aug 2024 15:30:26 +1000 Subject: [PATCH 2/3] add profiler and update readme --- wordcount/README.md | 12 +- wordcount/cpuprofile.pprof | Bin 0 -> 1463 bytes wordcount/main.go | 19 +- wordcount/shakespeare.txt | 75508 +++++++++++++++++++++++++++++++++++ 4 files changed, 75527 insertions(+), 12 deletions(-) create mode 100644 wordcount/cpuprofile.pprof create mode 100644 wordcount/shakespeare.txt diff --git a/wordcount/README.md b/wordcount/README.md index 7e8c0cd..0627ae2 100644 --- a/wordcount/README.md +++ b/wordcount/README.md @@ -8,8 +8,16 @@ A simple timer has been used to track execution time. Provide the name of the text file as a command line argument. -`go run main.go moby.txt` +`go run main.go shakespeare.txt` Example file to use: -https://www.gutenberg.org/cache/epub/2701/pg2701.txt +https://www.gutenberg.org/ebooks/100 + +# Execution time + +For base branch: +`"shakespeare.txt": 741200 words, duration: 3422897ms` + +For this branch: +`"shakespeare.txt": 378586 words, duration: 30136ms` diff --git a/wordcount/cpuprofile.pprof b/wordcount/cpuprofile.pprof new file mode 100644 index 0000000000000000000000000000000000000000..d62c4dd999d111ce8b9e5d02d42310136e474109 GIT binary patch literal 1463 zcmV;o1xWfIiwFP!00004|D=>(Y#dh=##wv++}-th&#b-nu^rd9)6(8`nw|AciBb^% z+DJ4=5fMdH@Uoqqz22R8X72RPtew1B>O+JO2vK-IMM82VcnDDiL8MhHXfP;G%?nB^ z87U^TB9ST{z(`0G0h&AA*oK#C^R|1xdw$<{&YYR2U){NL^V)Z}|2|vFk_?QMvSb{x zXK(!D`g32`KcCyY{HLsGkPKeB_PZ@Gb0h=TvyfRXlMLiANAv7*)&w$!zy9;vTVQ6% z7|cOtxkSdGfCWle%>*)zFa7lGEikiW96+n%P{blFv8rj1EWUH|reLY8uwvGHuqG)Nxr?)*&i^6>qXmxn4= zDXxIZ}f6OZr+7rg56q*e+QvlM>XiPG{Io!Yttje@y3BhP{lwT!vYkrO0k%OoNnh z`&%h<0(Mf^1kB+aonikf)~drE^Z5cPmdd6H;C@dd5sNEKd6tE;exi}V5ZgCtHBufFq3O$YdQIwF82 zT%we{mbe3M|0dN9xRs8`fCuqGO4*A=(;x=E{%We%;I{?cZVgN{DP_MZ)L__7VWKiw z1tS?y4R`OR{Y=3JRxcp8a#&1Tuwt7hlu;Ov4v+Khy9qK1@H(9yJXzjTd)b z(Ru4|HSMPkkKiNpQTCIJiKLG2ZojVe8Ms|Bb7U57)wRhCFl4mBb}P$eG7HPNOdIS* z-89Gy-no_5AbbP$WrU-6ls4GjtZ9&0ymL3z=iseWpM%HnG1_3;Xc`3Z>ZLcd=RExI z0bM^2$M6_!u>UNY2ARX(zme(-aC=GX3(&+SZLsT0ra|U$_svv40)I>OBVeIL8|<%% zzJTxUrYp4wZ>BvjLJM0I*^e`|so{)EwW;a)%q-5$FU;3S1~xY@Zvk#@-rfSXax%j| z#tV_{hn|Rd&XI#4=HpH{;N`#$WF#CJbR&L54T9M9g%yTMt~Pz!>9^PYwh}#82K;lD z3}dS=ebG^3!+J`1o~^8&v{ss&H(QqHcEow%SdnrpH3(wY7gls3YIkk5;Rel~9tEWxIRNU!eI zf$MdJYBZIwyJze$bb}s$+LB>Bl5ePFe9(63ux%hV&K~mRpT9q=WPS^WJw9ygS>?v! zzW50!;m>`@Z^jowaaOuPEL7AC#QB(iafI}K$BlhEY}=kEoxZTU39To2ZD)5gA^5MESd(M-+{R_?2 zLEv!y#Au0)FTtI&32&~0znZp@7#G!niYc|wHzvm=C*(SbSMV+Jx7qcc6} zthJL^N%NIqPXDZ}*7zq!$QUZAxAq7muC8+5-s2m-s~i31Dg^hI~z ziT&;GNqu(O550qjp%TIm!==sULO@vm8-a@XxxSnIa3R%PuV=F=lbeotJP}t0_!VQPaf+GT(5hapWw$ov2x-#w>quXiY3kq%eUR& RzX1RM|NnEi%_;y3001