From c5302cd593eb339d0edc8c5bbf46d542211db628 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Sun, 27 Aug 2023 16:36:10 +0530 Subject: [PATCH] prevalence db update --- assets/rules_prevalence.json.gz | Bin 0 -> 6056 bytes capa/render/default.py | 25 +++++++++++++------------ 2 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 assets/rules_prevalence.json.gz diff --git a/assets/rules_prevalence.json.gz b/assets/rules_prevalence.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..96aba1db5e7e064a2a5932ed941cfdb736f223e8 GIT binary patch literal 6056 zcmV;Z7gy*XiwFokGwWmm|8jL~WpiI}a%Fa5Y-Mg^WiD!SZ*BmsU2SvYHj@6nzXFv{ zyS3+R;+aY1?wc&x9w*XRkvuP(+Nu;tLJ~$G!2qNzW$XU?bps$JO8^LGZ!25t*i8@w zx}Sdf1=xT8g}{HlQEbH&t8zwY)$prNGol;CtF`{-f5rfGo|oCl4s9SSP-w&4wwm;_wQ&C$*9*E*Rm}N~)4J90tC4g^+8qWd|5hZkZnX zJK1qcYAFP~-ZNcVH?3ks=ZdjPZ)9^Rqghpuf;RMg@`~k*?{HBH@~n8nj1*ZZSrX0E z4nC}rBo~}jo?#W!Qoz%b4@=)4yA^-`^fV_QVYsW&8pd%eADE>PR_ynd>4rS>W<%af z_-Vsv!IbCg6@wijHLqFlkIl1$50urRl&v@Qt9D$;3T4QS zDo!&I&?oyFVxXc11PN?e0o#)|(x*lR6AUD9go}qa3>1*B?C@;QwQtI-f{#(ujiJHl z@>Xd;199-r*^EXTu->d~*aZ;kVUbeqjx+YypLraX1-7M-&litdgc=Y!e%89>1R<(b zxzFP9lh%sX8=g}EDu$yGzagM#!Syzd>b;Z(9Ppr#*_9I3WpD5n_9gJD4Io}c%McWS zUts0wRL)kz_u9uL0WjCzc{^RB=(}L0v^BR<7JS9Q`91vOk!)bXs3XQtm^X!?*bNfi zp)*0<>Kc%r6)_cc!oGcMm@0X7B6R*fYnxJ$<8X?t!6@LjfQKPt_CK(rqGdkSzm)LW zYy8dULV>Z28o-z0-nZabiC-KJMg?XB322pschykX2jdsAaJG@Rcpl zr;>%aKqpPI0|1*ca67*RWv``bF1DsdodoxeSUiH~V1OlS+InD_P9Iye)qu1kbeha3 zH$B*f$jTW#Bn^N*Yw#;Ran-a`+{nG}dCQ|Zj& z!G7?%6}YMJQLx~QWbJC@8&F8VQ!^X|vnewlsJ9giNY`NFfPlicaAgf(TObDnl52;n z;q{0-u|>S0n*H!LNc1|k02Wp-`(xZ0d+5dJcG*4TWh5tz481=-5`PC!Q)lV`azKB< z^fOSi=u6POJj=3nO-j%J*o_cg8qeX!0&B!XNc3bHCIQ2Q zutUF6-n|_am-}?`h>=c2`@sa%h_2BL8#12Hkx5qtRYjO5BYH5zjwAPffw|z=iWV%O zq2O|HmmZ$rr=|O~ilc^eO|j29C)QS}Bh<+6_SqhI6|Byun`N#zDDh8pN45PC4B#jC zTL?HyR(jRPg#owwoC)$}IwiCyu6Q_J_$*;rKS$&|<^=DCpf9tL-x~HL4>1X_eS$6k zlvYq1Ox&~PiRas~Y<(fh@iBl^`!>`9)mlICX+FOF@)Sc8gq}~c&#?F8;gO&p@o$0w zSOj}CN5QJK59zH|eOydK6SXjH;yi*X;PodJm^z|j6w{sMoJO?K;4(@s&w`=xV*KXX z#~%>06}pm>&871x&ys_-7~q`;QmbkgPN2|=fa75WP&+hZKL7$@#wf`wHCWKP@_LLD z$CpVt7&rr_0wm=wflw%pi46QMoSKrtyv>7C*`Wq+NoG`SiCh`Q`^+-`rdjZOnD5@A zov8ph)tj~fps5^d_k0=B>;rNBAKbwXmTB$HPyo@P5yW!bG5B}OLV|xbenf7W!6Ec~ z8(#Y<0UDylV39Wm!V9oA3>1_OvYPqr4H1R<&CiaG4g{10#x-0sjH7zPgfQBI8wxOP z{BD53xY-@Lph?hb-5O_ygww0xQf`ToEl@s}c`JFYq?Rjm(+vm?3j@#~ zPtV|hd{|-paV~hBNpy<-m4Z){n2;ok&tHPx!V@!LET%vl6ADak8(H_!1BRNueKUz6 z$4g>LX~Kk_^B)6*NceX~24?;8PD0CjbOlC-Av{<~;Y|mcn4ZaFSZ@~0ixJil07e%a zwFee`IJlQ3;u^Rvjq#o^qG%_B8A0O%f=tJ3z7YwfIRVm**~NWH7lk+}UBm2HQa0@R z-4DLK)%`bHAE*6vGA-@30b{&KxI~oPM^fXF8x;p2cz|M3=x~0&3pqT&#Qw)0AW{YX zpaU3G{eM213HiL@dXpqPh6rB>PSdiqx68CaTiDg>ckjOc?dF; z2cH%qv}P?AMT9~5BelPN+X(%)&R52Asm6cNy7rPPgf#0#0RL*5gF6r9!kqVTB_ZVU z5X5j{n48~MIr6I*!iMwO-VcSpH$bC066I=7fy{v$_2XpjNb_)7JC+-5^)U@Ok#)_) z`d{sL-?6vV+WY!bc|c@Ab5?rGGHXNU!CMJTpTHx4d!>$s5}#)eCXJ_IC+x<3&R97` zER-sly#KuP6kJa7=J28+A9_U8GJc!|gVSTU z*jA<78DNmO@K~LY;V|pw;b}>}$}E8qVQ#w>kpST1XlyeI#P{~FbE05iP@uy znF7#G3i$VE&8-_#Z-Ce7n0jIKbi+VJCRWb?+67(z$sD#xqeR*214O(x3gD8d>qwrZ zka=qfAKFRu89!@^eE^awOmH{Ui#s@pxy4j+XFxuZd^OJSTlm000s;rKZW}{>hY(T) zUb|%n8=(n$fFLn2oZVaeP+;wV>5Z2nhWvXHV_Mk66#xn0V zyCbu&2-nd#-et)mfYFb7bV4f`%xz)c*E84wIFY6?XlnowTT(Lh`c0LyKV}sM$?KR{3F%sPhlWnXHBoIx13VF8314m6+~a2#Du| z73RRww)(Q`hvl>g=_VrfAp4Q3<)25(2tWuy-p` ziU?&^97l(6bR!EJ{z2s7++#4)Qu00KZM<{t!^49{wsG3m;mNHTEk&u-nN2pK*B|0c$IrtI zdn}zD^jyKInON>b-UcE;anqCeWi|~CMS3() zOn#s@i9LlqKllh3N9pw5D}MAGE`_sLj{Iqylla|r7>eMb49&!f*zF9bLQRP!lZCID zTrjFGgI^s8FfdXM#(yrNJjT*kX2__DTGn`SwE$f)uIZxmTI>GeD{> z>Sk@zMBDw`0X&Ztfo56yJ)_uDM{ck&bOAmb;Nq{do5kM-!)#2%4*_h|@@7k8O|A{U zLeHj40^qf&IL5^(QAKtLXh=#S?-y3KsqMcX$5X$`j^`ia+^IW-X!9~5=XYcV@P!*WDxS3o#qg|&~Q9xyL|^)TI3xyAU0XsrvB zI+OM!Al-#K68avCJ`b<4#}eD5Nn0U60(*%Z+t>h@f+4xo7KWb?_HF|l5UYd7g4W%f z1S(qfp|4*9USUs%Pm9;4*R_{)iV+>UZ~ilxo$ixk?XD}>^q0~hN!MC%ngqbPByh!- zh;T1c4eyth=iBM8DP>*w4gQ=MY8t6H8}$%1A#gA3ubbqVHu7 z-$n3Z1M>ZEcc7cTBSBqz^^u6Uwztj=kBW-fYc;uc=H?Rq&^-*p=J?T zWh$ipHoq6gS5F6tA4qlOm}66gT~tHvLvg{#as%Z0VlC(`kB!6R{x44pZtCJpK z`x=ZK*;SJG3dk`SU&XFEV@!PtBavOwM^Z8h`H-jS>U_r<_T!q>fsBcI%&D1$?C0SCX?mG`fZZkvsX9j~5QmcEd>fjA`jEsbnIU0!FRKnGs2psm2jC=s{7=j%LK^bejY zs#)@UpLMe6eo&B?$c$vwMRJ40FK|Nn$nVc0mHvcgAgPNqxWDwwa%)#m#sd(6c5f^R zSRhGhDBaL%ZIfr%{#Vo5H)O{!X$zqe443y5AN)RMunuI!4AIp7_8C?g3hU0_m=L!> zT8bH1oqlUUG(y9`7?v}WEd~!KG092s9V3$Ea-MqnZ18<1V)q;(jvOR9GHNrrT&>Kd zT>T${k(-2Iu_uW3t+t=fTg4#br%qSS3>LaHN*40n7l z9_F{R$@kYkrkx_bejCnHyts?VV#;(8IDc*wrcZ|0tMaDZ(fvi}3aXQ94o~sBZ>)?Q zIeM|<{T(6(O@l`#Z#%l~&0wDw`Ao?yQsC>7HeI&#MWxYRbx_;iMNbiWmHH$*`SL?e zhV9mU<()~)-OdAlgxRk7&b^U(cJCr^Qpy*DVl+O`^H+CtCn z*@-s(V67I}h*6Iv=zS47d;=fKlshSA@})tudI@m9@^$cd!Tt~{l=L?>xq^vlDPaD5 z@W4~psNAkreE-@f*Q{#MSDWsk*FN0UB?5Q!MW5}+LF~EX{Up+!u4QGgGwibdghUKy zhFTcDck+j~R~c`PjV)$!Ku2~A)^zP%lym=JZY;_MZ{-ql4a2`@qP9hX!MpuVwXD|H zFh&2>Idd#bn*tc7&{U|=C096aZ-2QlsbDKx2jH$(Aar@o$z+)l{^Ew~I_^YkM^M*l47@&;J_g=851!uNqG z+DwS*jy{>LwX)z_E=@XgU zPKR)RxFxCUZaqEz8>FJV+R5#C1Zs~#_&VLTsOO&lBLV;-DYrb~?8a#FDHiJmP)^nd zyv^Wx2$AvnvA|)wHF_OwapdIOT|}Li#tx+*+9O^ywoXf}FT3!Ln2AXSojXM!! zfjeYTL#(>5L!R83e9b~sr9mrrDC-5wL#I+ZI6b}iKlmogPwg2R|GoLAnedB4<>Bzu z+`Oj1W#AMFKtOwf?Gc2Idm&#i5zuex7( zS2KQz#foR%K1GZ9bE^)9Cd|EuJ*}KQ{_wN$4(^^76+H~$j`ei2K*PtEHK9AbkcRWB ijoj~9Q*5XW|M?`&y5|**#8!X#-~R&~As-tyX8-`OQMlp& literal 0 HcmV?d00001 diff --git a/capa/render/default.py b/capa/render/default.py index ac2c2eef5..3dac5ba2b 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -6,8 +6,10 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. +import gzip import json import collections +from typing import Dict from pathlib import Path import tabulate @@ -85,16 +87,18 @@ def render_capabilities(doc: rd.ResultDocument, ostream: StringIO): +-------------------------------------------------------+-------------------------------------------------+ """ - def load_rules_prevalence(file: Path) -> dict: + def load_rules_prevalence(file: Path) -> Dict[str, str]: try: - return json.load(file.open("r")) + with gzip.open(file, "rb") as gzfile: + return json.loads(gzfile.read().decode("utf-8")) except FileNotFoundError: raise FileNotFoundError(f"File '{file}' not found.") except Exception as e: raise RuntimeError(f"An error occurred while loading '{file}': {e}") subrule_matches = find_subrule_matches(doc) - rules_prevalence = load_rules_prevalence(Path("./assets/rules_prevalence.json")) + CD = Path(__file__).resolve().parent.parent.parent + rules_prevalence = load_rules_prevalence(CD / "assets" / "rules_prevalence.json.gz") # seperate rules based on their prevalence common = [] @@ -106,17 +110,14 @@ def load_rules_prevalence(file: Path) -> dict: count = len(rule.matches) matches = f"({count} matches)" if count > 1 else "" - rule_prevalence = float(rules_prevalence.get(rule.meta.name, 0)) - if rule_prevalence < 0: - raise ValueError("Match probability cannot be negative") + prevalence = rules_prevalence.get(rule.meta.name, None) - prevalences = [rutils.bold("rare"), rutils.bold("common"), "unknown"] - - if rule_prevalence == 0 or rule_prevalence >= 0.05: - prevalence = prevalences[2] if rule_prevalence == 0 else prevalences[1] - common.append((rule.meta.namespace, rule.meta.name, matches, prevalence)) + if prevalence == "rare": + rare.append((rule.meta.namespace, rule.meta.name, matches, rutils.bold(prevalence))) + elif prevalence == "common": + common.append((rule.meta.namespace, rule.meta.name, matches, rutils.bold(prevalence))) else: - rare.append((rule.meta.namespace, rule.meta.name, matches, prevalences[0])) + common.append((rule.meta.namespace, rule.meta.name, matches, "unknown")) rows = []